diff options
Diffstat (limited to 'llvm/test/CodeGen/NVPTX')
44 files changed, 3282 insertions, 1405 deletions
diff --git a/llvm/test/CodeGen/NVPTX/LoadStoreVectorizer.ll b/llvm/test/CodeGen/NVPTX/LoadStoreVectorizer.ll index 23832a9..dd9a472 100644 --- a/llvm/test/CodeGen/NVPTX/LoadStoreVectorizer.ll +++ b/llvm/test/CodeGen/NVPTX/LoadStoreVectorizer.ll @@ -181,32 +181,32 @@ define void @combine_v16i8(ptr noundef align 16 %ptr1, ptr noundef align 16 %ptr ; ENABLED-NEXT: prmt.b32 %r5, %r4, 0, 0x7773U; ; ENABLED-NEXT: prmt.b32 %r6, %r4, 0, 0x7772U; ; ENABLED-NEXT: prmt.b32 %r7, %r4, 0, 0x7771U; -; ENABLED-NEXT: prmt.b32 %r8, %r4, 0, 0x7770U; -; ENABLED-NEXT: prmt.b32 %r9, %r3, 0, 0x7773U; -; ENABLED-NEXT: prmt.b32 %r10, %r3, 0, 0x7772U; -; ENABLED-NEXT: prmt.b32 %r11, %r3, 0, 0x7771U; -; ENABLED-NEXT: prmt.b32 %r12, %r3, 0, 0x7770U; -; ENABLED-NEXT: prmt.b32 %r13, %r2, 0, 0x7773U; -; ENABLED-NEXT: prmt.b32 %r14, %r2, 0, 0x7772U; -; ENABLED-NEXT: prmt.b32 %r15, %r2, 0, 0x7771U; -; ENABLED-NEXT: prmt.b32 %r16, %r2, 0, 0x7770U; -; ENABLED-NEXT: prmt.b32 %r17, %r1, 0, 0x7773U; -; ENABLED-NEXT: prmt.b32 %r18, %r1, 0, 0x7772U; -; ENABLED-NEXT: prmt.b32 %r19, %r1, 0, 0x7771U; -; ENABLED-NEXT: prmt.b32 %r20, %r1, 0, 0x7770U; +; ENABLED-NEXT: prmt.b32 %r8, %r3, 0, 0x7773U; +; ENABLED-NEXT: prmt.b32 %r9, %r3, 0, 0x7772U; +; ENABLED-NEXT: prmt.b32 %r10, %r3, 0, 0x7771U; +; ENABLED-NEXT: prmt.b32 %r11, %r2, 0, 0x7773U; +; ENABLED-NEXT: prmt.b32 %r12, %r2, 0, 0x7772U; +; ENABLED-NEXT: prmt.b32 %r13, %r2, 0, 0x7771U; +; ENABLED-NEXT: prmt.b32 %r14, %r1, 0, 0x7773U; +; ENABLED-NEXT: prmt.b32 %r15, %r1, 0, 0x7772U; +; ENABLED-NEXT: prmt.b32 %r16, %r1, 0, 0x7771U; ; ENABLED-NEXT: ld.param.b64 %rd2, [combine_v16i8_param_1]; -; ENABLED-NEXT: add.s32 %r21, %r20, %r19; -; ENABLED-NEXT: add.s32 %r22, %r21, %r18; -; ENABLED-NEXT: add.s32 %r23, %r22, %r17; -; ENABLED-NEXT: add.s32 %r24, %r23, %r16; -; ENABLED-NEXT: add.s32 %r25, %r24, %r15; -; ENABLED-NEXT: add.s32 %r26, %r25, %r14; -; ENABLED-NEXT: add.s32 %r27, %r26, %r13; -; ENABLED-NEXT: add.s32 %r28, %r27, %r12; -; ENABLED-NEXT: add.s32 %r29, %r28, %r11; -; ENABLED-NEXT: add.s32 %r30, %r29, %r10; -; ENABLED-NEXT: add.s32 %r31, %r30, %r9; -; ENABLED-NEXT: add.s32 %r32, %r31, %r8; +; ENABLED-NEXT: and.b32 %r17, %r1, 255; +; ENABLED-NEXT: and.b32 %r18, %r2, 255; +; ENABLED-NEXT: and.b32 %r19, %r3, 255; +; ENABLED-NEXT: and.b32 %r20, %r4, 255; +; ENABLED-NEXT: add.s32 %r21, %r17, %r16; +; ENABLED-NEXT: add.s32 %r22, %r21, %r15; +; ENABLED-NEXT: add.s32 %r23, %r22, %r14; +; ENABLED-NEXT: add.s32 %r24, %r23, %r18; +; ENABLED-NEXT: add.s32 %r25, %r24, %r13; +; ENABLED-NEXT: add.s32 %r26, %r25, %r12; +; ENABLED-NEXT: add.s32 %r27, %r26, %r11; +; ENABLED-NEXT: add.s32 %r28, %r27, %r19; +; ENABLED-NEXT: add.s32 %r29, %r28, %r10; +; ENABLED-NEXT: add.s32 %r30, %r29, %r9; +; ENABLED-NEXT: add.s32 %r31, %r30, %r8; +; ENABLED-NEXT: add.s32 %r32, %r31, %r20; ; ENABLED-NEXT: add.s32 %r33, %r32, %r7; ; ENABLED-NEXT: add.s32 %r34, %r33, %r6; ; ENABLED-NEXT: add.s32 %r35, %r34, %r5; @@ -332,36 +332,36 @@ define void @combine_v16i8_unaligned(ptr noundef align 8 %ptr1, ptr noundef alig ; ENABLED-NEXT: prmt.b32 %r3, %r2, 0, 0x7773U; ; ENABLED-NEXT: prmt.b32 %r4, %r2, 0, 0x7772U; ; ENABLED-NEXT: prmt.b32 %r5, %r2, 0, 0x7771U; -; ENABLED-NEXT: prmt.b32 %r6, %r2, 0, 0x7770U; -; ENABLED-NEXT: prmt.b32 %r7, %r1, 0, 0x7773U; -; ENABLED-NEXT: prmt.b32 %r8, %r1, 0, 0x7772U; -; ENABLED-NEXT: prmt.b32 %r9, %r1, 0, 0x7771U; -; ENABLED-NEXT: prmt.b32 %r10, %r1, 0, 0x7770U; +; ENABLED-NEXT: prmt.b32 %r6, %r1, 0, 0x7773U; +; ENABLED-NEXT: prmt.b32 %r7, %r1, 0, 0x7772U; +; ENABLED-NEXT: prmt.b32 %r8, %r1, 0, 0x7771U; ; ENABLED-NEXT: ld.param.b64 %rd2, [combine_v16i8_unaligned_param_1]; -; ENABLED-NEXT: ld.v2.b32 {%r11, %r12}, [%rd1+8]; -; ENABLED-NEXT: prmt.b32 %r13, %r12, 0, 0x7773U; -; ENABLED-NEXT: prmt.b32 %r14, %r12, 0, 0x7772U; -; ENABLED-NEXT: prmt.b32 %r15, %r12, 0, 0x7771U; -; ENABLED-NEXT: prmt.b32 %r16, %r12, 0, 0x7770U; -; ENABLED-NEXT: prmt.b32 %r17, %r11, 0, 0x7773U; -; ENABLED-NEXT: prmt.b32 %r18, %r11, 0, 0x7772U; -; ENABLED-NEXT: prmt.b32 %r19, %r11, 0, 0x7771U; -; ENABLED-NEXT: prmt.b32 %r20, %r11, 0, 0x7770U; -; ENABLED-NEXT: add.s32 %r21, %r10, %r9; -; ENABLED-NEXT: add.s32 %r22, %r21, %r8; -; ENABLED-NEXT: add.s32 %r23, %r22, %r7; -; ENABLED-NEXT: add.s32 %r24, %r23, %r6; +; ENABLED-NEXT: ld.v2.b32 {%r9, %r10}, [%rd1+8]; +; ENABLED-NEXT: prmt.b32 %r11, %r10, 0, 0x7773U; +; ENABLED-NEXT: prmt.b32 %r12, %r10, 0, 0x7772U; +; ENABLED-NEXT: prmt.b32 %r13, %r10, 0, 0x7771U; +; ENABLED-NEXT: prmt.b32 %r14, %r9, 0, 0x7773U; +; ENABLED-NEXT: prmt.b32 %r15, %r9, 0, 0x7772U; +; ENABLED-NEXT: prmt.b32 %r16, %r9, 0, 0x7771U; +; ENABLED-NEXT: and.b32 %r17, %r1, 255; +; ENABLED-NEXT: and.b32 %r18, %r2, 255; +; ENABLED-NEXT: and.b32 %r19, %r9, 255; +; ENABLED-NEXT: and.b32 %r20, %r10, 255; +; ENABLED-NEXT: add.s32 %r21, %r17, %r8; +; ENABLED-NEXT: add.s32 %r22, %r21, %r7; +; ENABLED-NEXT: add.s32 %r23, %r22, %r6; +; ENABLED-NEXT: add.s32 %r24, %r23, %r18; ; ENABLED-NEXT: add.s32 %r25, %r24, %r5; ; ENABLED-NEXT: add.s32 %r26, %r25, %r4; ; ENABLED-NEXT: add.s32 %r27, %r26, %r3; -; ENABLED-NEXT: add.s32 %r28, %r27, %r20; -; ENABLED-NEXT: add.s32 %r29, %r28, %r19; -; ENABLED-NEXT: add.s32 %r30, %r29, %r18; -; ENABLED-NEXT: add.s32 %r31, %r30, %r17; -; ENABLED-NEXT: add.s32 %r32, %r31, %r16; -; ENABLED-NEXT: add.s32 %r33, %r32, %r15; -; ENABLED-NEXT: add.s32 %r34, %r33, %r14; -; ENABLED-NEXT: add.s32 %r35, %r34, %r13; +; ENABLED-NEXT: add.s32 %r28, %r27, %r19; +; ENABLED-NEXT: add.s32 %r29, %r28, %r16; +; ENABLED-NEXT: add.s32 %r30, %r29, %r15; +; ENABLED-NEXT: add.s32 %r31, %r30, %r14; +; ENABLED-NEXT: add.s32 %r32, %r31, %r20; +; ENABLED-NEXT: add.s32 %r33, %r32, %r13; +; ENABLED-NEXT: add.s32 %r34, %r33, %r12; +; ENABLED-NEXT: add.s32 %r35, %r34, %r11; ; ENABLED-NEXT: st.b32 [%rd2], %r35; ; ENABLED-NEXT: ret; ; diff --git a/llvm/test/CodeGen/NVPTX/aggregate-return.ll b/llvm/test/CodeGen/NVPTX/aggregate-return.ll index 7f52e52..abc873e 100644 --- a/llvm/test/CodeGen/NVPTX/aggregate-return.ll +++ b/llvm/test/CodeGen/NVPTX/aggregate-return.ll @@ -16,8 +16,8 @@ define void @test_v2f32(<2 x float> %input, ptr %output) { ; CHECK-NEXT: ld.param.b64 %rd1, [test_v2f32_param_0]; ; CHECK-NEXT: { // callseq 0, 0 ; CHECK-NEXT: .param .align 8 .b8 param0[8]; -; CHECK-NEXT: st.param.b64 [param0], %rd1; ; CHECK-NEXT: .param .align 8 .b8 retval0[8]; +; CHECK-NEXT: st.param.b64 [param0], %rd1; ; CHECK-NEXT: call.uni (retval0), barv, (param0); ; CHECK-NEXT: ld.param.b64 %rd2, [retval0]; ; CHECK-NEXT: } // callseq 0 @@ -32,24 +32,24 @@ define void @test_v2f32(<2 x float> %input, ptr %output) { define void @test_v3f32(<3 x float> %input, ptr %output) { ; CHECK-LABEL: test_v3f32( ; CHECK: { -; CHECK-NEXT: .reg .b32 %r<10>; -; CHECK-NEXT: .reg .b64 %rd<2>; +; CHECK-NEXT: .reg .b32 %r<4>; +; CHECK-NEXT: .reg .b64 %rd<5>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_v3f32_param_0]; -; CHECK-NEXT: ld.param.b32 %r3, [test_v3f32_param_0+8]; +; CHECK-NEXT: ld.param.b64 %rd1, [test_v3f32_param_0]; +; CHECK-NEXT: ld.param.b32 %r1, [test_v3f32_param_0+8]; ; CHECK-NEXT: { // callseq 1, 0 ; CHECK-NEXT: .param .align 16 .b8 param0[16]; -; CHECK-NEXT: st.param.v2.b32 [param0], {%r1, %r2}; -; CHECK-NEXT: st.param.b32 [param0+8], %r3; ; CHECK-NEXT: .param .align 16 .b8 retval0[16]; +; CHECK-NEXT: st.param.b32 [param0+8], %r1; +; CHECK-NEXT: st.param.b64 [param0], %rd1; ; CHECK-NEXT: call.uni (retval0), barv3, (param0); -; CHECK-NEXT: ld.param.v2.b32 {%r4, %r5}, [retval0]; -; CHECK-NEXT: ld.param.b32 %r6, [retval0+8]; +; CHECK-NEXT: ld.param.b32 %r2, [retval0+8]; +; CHECK-NEXT: ld.param.b64 %rd2, [retval0]; ; CHECK-NEXT: } // callseq 1 -; CHECK-NEXT: ld.param.b64 %rd1, [test_v3f32_param_1]; -; CHECK-NEXT: st.v2.b32 [%rd1], {%r4, %r5}; -; CHECK-NEXT: st.b32 [%rd1+8], %r6; +; CHECK-NEXT: ld.param.b64 %rd4, [test_v3f32_param_1]; +; CHECK-NEXT: st.b32 [%rd4+8], %r2; +; CHECK-NEXT: st.b64 [%rd4], %rd2; ; CHECK-NEXT: ret; %call = tail call <3 x float> @barv3(<3 x float> %input) ; Make sure we don't load more values than than we need to. @@ -68,16 +68,16 @@ define void @test_a2f32([2 x float] %input, ptr %output) { ; CHECK-NEXT: ld.param.b32 %r2, [test_a2f32_param_0+4]; ; CHECK-NEXT: { // callseq 2, 0 ; CHECK-NEXT: .param .align 4 .b8 param0[8]; -; CHECK-NEXT: st.param.b32 [param0], %r1; -; CHECK-NEXT: st.param.b32 [param0+4], %r2; ; CHECK-NEXT: .param .align 4 .b8 retval0[8]; +; CHECK-NEXT: st.param.b32 [param0+4], %r2; +; CHECK-NEXT: st.param.b32 [param0], %r1; ; CHECK-NEXT: call.uni (retval0), bara, (param0); -; CHECK-NEXT: ld.param.b32 %r3, [retval0]; -; CHECK-NEXT: ld.param.b32 %r4, [retval0+4]; +; CHECK-NEXT: ld.param.b32 %r3, [retval0+4]; +; CHECK-NEXT: ld.param.b32 %r4, [retval0]; ; CHECK-NEXT: } // callseq 2 ; CHECK-NEXT: ld.param.b64 %rd1, [test_a2f32_param_1]; -; CHECK-NEXT: st.b32 [%rd1+4], %r4; -; CHECK-NEXT: st.b32 [%rd1], %r3; +; CHECK-NEXT: st.b32 [%rd1+4], %r3; +; CHECK-NEXT: st.b32 [%rd1], %r4; ; CHECK-NEXT: ret; %call = tail call [2 x float] @bara([2 x float] %input) store [2 x float] %call, ptr %output, align 4 @@ -95,16 +95,16 @@ define void @test_s2f32({float, float} %input, ptr %output) { ; CHECK-NEXT: ld.param.b32 %r2, [test_s2f32_param_0+4]; ; CHECK-NEXT: { // callseq 3, 0 ; CHECK-NEXT: .param .align 4 .b8 param0[8]; -; CHECK-NEXT: st.param.b32 [param0], %r1; -; CHECK-NEXT: st.param.b32 [param0+4], %r2; ; CHECK-NEXT: .param .align 4 .b8 retval0[8]; +; CHECK-NEXT: st.param.b32 [param0+4], %r2; +; CHECK-NEXT: st.param.b32 [param0], %r1; ; CHECK-NEXT: call.uni (retval0), bars, (param0); -; CHECK-NEXT: ld.param.b32 %r3, [retval0]; -; CHECK-NEXT: ld.param.b32 %r4, [retval0+4]; +; CHECK-NEXT: ld.param.b32 %r3, [retval0+4]; +; CHECK-NEXT: ld.param.b32 %r4, [retval0]; ; CHECK-NEXT: } // callseq 3 ; CHECK-NEXT: ld.param.b64 %rd1, [test_s2f32_param_1]; -; CHECK-NEXT: st.b32 [%rd1+4], %r4; -; CHECK-NEXT: st.b32 [%rd1], %r3; +; CHECK-NEXT: st.b32 [%rd1+4], %r3; +; CHECK-NEXT: st.b32 [%rd1], %r4; ; CHECK-NEXT: ret; %call = tail call {float, float} @bars({float, float} %input) store {float, float} %call, ptr %output, align 4 diff --git a/llvm/test/CodeGen/NVPTX/bf16x2-instructions.ll b/llvm/test/CodeGen/NVPTX/bf16x2-instructions.ll index ba5813c..b4641d0 100644 --- a/llvm/test/CodeGen/NVPTX/bf16x2-instructions.ll +++ b/llvm/test/CodeGen/NVPTX/bf16x2-instructions.ll @@ -208,13 +208,13 @@ define <2 x bfloat> @test_call(<2 x bfloat> %a, <2 x bfloat> %b) #0 { ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b32 %r1, [test_call_param_0]; -; CHECK-NEXT: ld.param.b32 %r2, [test_call_param_1]; ; CHECK-NEXT: { // callseq 0, 0 ; CHECK-NEXT: .param .align 4 .b8 param0[4]; -; CHECK-NEXT: st.param.b32 [param0], %r1; ; CHECK-NEXT: .param .align 4 .b8 param1[4]; -; CHECK-NEXT: st.param.b32 [param1], %r2; ; CHECK-NEXT: .param .align 4 .b8 retval0[4]; +; CHECK-NEXT: ld.param.b32 %r2, [test_call_param_1]; +; CHECK-NEXT: st.param.b32 [param1], %r2; +; CHECK-NEXT: st.param.b32 [param0], %r1; ; CHECK-NEXT: call.uni (retval0), test_callee, (param0, param1); ; CHECK-NEXT: ld.param.b32 %r3, [retval0]; ; CHECK-NEXT: } // callseq 0 diff --git a/llvm/test/CodeGen/NVPTX/bug26185-2.ll b/llvm/test/CodeGen/NVPTX/bug26185-2.ll index 4e11f58..46172b1 100644 --- a/llvm/test/CodeGen/NVPTX/bug26185-2.ll +++ b/llvm/test/CodeGen/NVPTX/bug26185-2.ll @@ -16,7 +16,7 @@ define ptx_kernel void @spam(ptr addrspace(1) noalias nocapture readonly %arg, p ; CHECK: .maxntid 1, 1, 1 ; CHECK-NEXT: { ; CHECK-NEXT: .reg .b32 %r<2>; -; CHECK-NEXT: .reg .b64 %rd<9>; +; CHECK-NEXT: .reg .b64 %rd<8>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: // %bb ; CHECK-NEXT: ld.param.b64 %rd1, [spam_param_0]; @@ -25,10 +25,9 @@ define ptx_kernel void @spam(ptr addrspace(1) noalias nocapture readonly %arg, p ; CHECK-NEXT: add.s64 %rd4, %rd1, %rd3; ; CHECK-NEXT: ld.param.b64 %rd5, [spam_param_1]; ; CHECK-NEXT: ld.global.nc.s16 %r1, [%rd4+16]; -; CHECK-NEXT: mul.wide.s32 %rd6, %r1, %r1; -; CHECK-NEXT: ld.global.b64 %rd7, [%rd5]; -; CHECK-NEXT: add.s64 %rd8, %rd6, %rd7; -; CHECK-NEXT: st.global.b64 [%rd5], %rd8; +; CHECK-NEXT: ld.global.b64 %rd6, [%rd5]; +; CHECK-NEXT: mad.wide.s32 %rd7, %r1, %r1, %rd6; +; CHECK-NEXT: st.global.b64 [%rd5], %rd7; ; CHECK-NEXT: ret; bb: %tmp5 = add nsw i64 %arg3, 8 diff --git a/llvm/test/CodeGen/NVPTX/byval-const-global.ll b/llvm/test/CodeGen/NVPTX/byval-const-global.ll index ad9e4b0..b4934e1a 100644 --- a/llvm/test/CodeGen/NVPTX/byval-const-global.ll +++ b/llvm/test/CodeGen/NVPTX/byval-const-global.ll @@ -13,12 +13,12 @@ define void @foo() { ; CHECK-NEXT: .reg .b64 %rd<3>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: ld.global.b64 %rd1, [G]; -; CHECK-NEXT: ld.global.b64 %rd2, [G+8]; ; CHECK-NEXT: { // callseq 0, 0 ; CHECK-NEXT: .param .align 8 .b8 param0[16]; -; CHECK-NEXT: st.param.b64 [param0], %rd1; -; CHECK-NEXT: st.param.b64 [param0+8], %rd2; +; CHECK-NEXT: ld.global.b64 %rd1, [G+8]; +; CHECK-NEXT: st.param.b64 [param0+8], %rd1; +; CHECK-NEXT: ld.global.b64 %rd2, [G]; +; CHECK-NEXT: st.param.b64 [param0], %rd2; ; CHECK-NEXT: call.uni bar, (param0); ; CHECK-NEXT: } // callseq 0 ; CHECK-NEXT: ret; diff --git a/llvm/test/CodeGen/NVPTX/call-with-alloca-buffer.ll b/llvm/test/CodeGen/NVPTX/call-with-alloca-buffer.ll index 0cd7058..0eb7f64 100644 --- a/llvm/test/CodeGen/NVPTX/call-with-alloca-buffer.ll +++ b/llvm/test/CodeGen/NVPTX/call-with-alloca-buffer.ll @@ -44,11 +44,11 @@ entry: %arrayidx7 = getelementptr inbounds [16 x i8], ptr %buf, i64 0, i64 3 store float %3, ptr %arrayidx7, align 4 -; CHECK: .param .b64 param0; -; CHECK-NEXT: st.param.b64 [param0], %rd[[A_REG]] -; CHECK-NEXT: .param .b64 param1; -; CHECK-NEXT: st.param.b64 [param1], %rd[[SP_REG]] -; CHECK-NEXT: call.uni callee, +; CHECK-DAG: .param .b64 param0; +; CHECK-DAG: .param .b64 param1; +; CHECK-DAG: st.param.b64 [param0], %rd[[A_REG]] +; CHECK-DAG: st.param.b64 [param1], %rd[[SP_REG]] +; CHECK: call.uni callee, call void @callee(ptr %a, ptr %buf) #2 ret void diff --git a/llvm/test/CodeGen/NVPTX/call_bitcast_byval.ll b/llvm/test/CodeGen/NVPTX/call_bitcast_byval.ll index f67145d..483d48a 100644 --- a/llvm/test/CodeGen/NVPTX/call_bitcast_byval.ll +++ b/llvm/test/CodeGen/NVPTX/call_bitcast_byval.ll @@ -14,11 +14,11 @@ target triple = "nvptx64-nvidia-cuda" %complex_half = type { half, half } ; CHECK: .param .align 2 .b8 param2[4]; -; CHECK: st.param.b16 [param2], %rs1; -; CHECK: st.param.b16 [param2+2], %rs2; ; CHECK: .param .align 2 .b8 retval0[4]; -; CHECK-NEXT: prototype_0 : .callprototype (.param .align 2 .b8 _[4]) _ (.param .b32 _, .param .b32 _, .param .align 2 .b8 _[4]); -; CHECK-NEXT: call (retval0), +; CHECK-DAG: st.param.b16 [param2], %rs{{[0-9]+}}; +; CHECK-DAG: st.param.b16 [param2+2], %rs{{[0-9]+}}; +; CHECK: prototype_0 : .callprototype (.param .align 2 .b8 _[4]) _ (.param .b32 _, .param .b32 _, .param .align 2 .b8 _[4]); +; CHECK: call (retval0), define weak_odr void @foo() { entry: %call.i.i.i = tail call %"class.complex" @_Z20__spirv_GroupCMulKHRjjN5__spv12complex_halfE(i32 0, i32 0, ptr byval(%"class.complex") null) @@ -36,10 +36,10 @@ define internal void @callee(ptr byval(%"class.complex") %byval_arg) { } define void @boom() { %fp = call ptr @usefp(ptr @callee) - ; CHECK: .param .align 2 .b8 param0[4]; - ; CHECK: st.param.b16 [param0], %rs1; - ; CHECK: st.param.b16 [param0+2], %rs2; - ; CHECK: .callprototype ()_ (.param .align 2 .b8 _[4]); + ; CHECK-DAG: .param .align 2 .b8 param0[4]; + ; CHECK-DAG: st.param.b16 [param0], %rs{{[0-9]+}}; + ; CHECK-DAG: st.param.b16 [param0+2], %rs{{[0-9]+}}; + ; CHECK-DAG: .callprototype ()_ (.param .align 2 .b8 _[4]); call void %fp(ptr byval(%"class.complex") null) ret void } diff --git a/llvm/test/CodeGen/NVPTX/combine-mad.ll b/llvm/test/CodeGen/NVPTX/combine-mad.ll index 2232810..da303b7 100644 --- a/llvm/test/CodeGen/NVPTX/combine-mad.ll +++ b/llvm/test/CodeGen/NVPTX/combine-mad.ll @@ -199,10 +199,10 @@ define i32 @test_mad_multi_use(i32 %a, i32 %b, i32 %c) { ; CHECK-NEXT: add.s32 %r5, %r3, %r4; ; CHECK-NEXT: { // callseq 0, 0 ; CHECK-NEXT: .param .b32 param0; -; CHECK-NEXT: st.param.b32 [param0], %r3; ; CHECK-NEXT: .param .b32 param1; -; CHECK-NEXT: st.param.b32 [param1], %r5; ; CHECK-NEXT: .param .b32 retval0; +; CHECK-NEXT: st.param.b32 [param0], %r3; +; CHECK-NEXT: st.param.b32 [param1], %r5; ; CHECK-NEXT: call.uni (retval0), use, (param0, param1); ; CHECK-NEXT: ld.param.b32 %r6, [retval0]; ; CHECK-NEXT: } // callseq 0 diff --git a/llvm/test/CodeGen/NVPTX/combine-wide.ll b/llvm/test/CodeGen/NVPTX/combine-wide.ll new file mode 100644 index 0000000..ed4a2b6 --- /dev/null +++ b/llvm/test/CodeGen/NVPTX/combine-wide.ll @@ -0,0 +1,1339 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc < %s -O1 | FileCheck %s --check-prefixes=CHECK,O1 +; RUN: llc < %s -O0 | FileCheck %s --check-prefixes=CHECK,O0 + +target triple = "nvptx64-nvidia-cuda" + +define i64 @t1(i32 %a, i32 %b, i64 %c) { +; +; O1-LABEL: t1( +; O1: { +; O1-NEXT: .reg .b32 %r<3>; +; O1-NEXT: .reg .b64 %rd<3>; +; O1-EMPTY: +; O1-NEXT: // %bb.0: +; O1-NEXT: ld.param.b32 %r1, [t1_param_0]; +; O1-NEXT: ld.param.b32 %r2, [t1_param_1]; +; O1-NEXT: ld.param.b64 %rd1, [t1_param_2]; +; O1-NEXT: mad.wide.s32 %rd2, %r1, %r2, %rd1; +; O1-NEXT: st.param.b64 [func_retval0], %rd2; +; O1-NEXT: ret; +; +; O0-LABEL: t1( +; O0: { +; O0-NEXT: .reg .b32 %r<4>; +; O0-NEXT: .reg .b64 %rd<4>; +; O0-EMPTY: +; O0-NEXT: // %bb.0: +; O0-NEXT: ld.param.b64 %rd1, [t1_param_2]; +; O0-NEXT: ld.param.b32 %r2, [t1_param_1]; +; O0-NEXT: ld.param.b32 %r1, [t1_param_0]; +; O0-NEXT: mul.lo.s32 %r3, %r1, %r2; +; O0-NEXT: cvt.s64.s32 %rd2, %r3; +; O0-NEXT: add.s64 %rd3, %rd1, %rd2; +; O0-NEXT: st.param.b64 [func_retval0], %rd3; +; O0-NEXT: ret; + %mul = mul nsw i32 %a, %b + %sext = sext i32 %mul to i64 + %add = add i64 %c, %sext + ret i64 %add +} + +define i64 @t2(i32 %a, i32 %b, i64 %c) { +; +; O1-LABEL: t2( +; O1: { +; O1-NEXT: .reg .b32 %r<3>; +; O1-NEXT: .reg .b64 %rd<3>; +; O1-EMPTY: +; O1-NEXT: // %bb.0: +; O1-NEXT: ld.param.b32 %r1, [t2_param_0]; +; O1-NEXT: ld.param.b32 %r2, [t2_param_1]; +; O1-NEXT: ld.param.b64 %rd1, [t2_param_2]; +; O1-NEXT: mad.wide.s32 %rd2, %r1, %r2, %rd1; +; O1-NEXT: st.param.b64 [func_retval0], %rd2; +; O1-NEXT: ret; +; +; O0-LABEL: t2( +; O0: { +; O0-NEXT: .reg .b32 %r<4>; +; O0-NEXT: .reg .b64 %rd<4>; +; O0-EMPTY: +; O0-NEXT: // %bb.0: +; O0-NEXT: ld.param.b64 %rd1, [t2_param_2]; +; O0-NEXT: ld.param.b32 %r2, [t2_param_1]; +; O0-NEXT: ld.param.b32 %r1, [t2_param_0]; +; O0-NEXT: mul.lo.s32 %r3, %r1, %r2; +; O0-NEXT: cvt.s64.s32 %rd2, %r3; +; O0-NEXT: add.s64 %rd3, %rd2, %rd1; +; O0-NEXT: st.param.b64 [func_retval0], %rd3; +; O0-NEXT: ret; + %mul = mul nsw i32 %a, %b + %sext = sext i32 %mul to i64 + %add = add i64 %sext, %c + ret i64 %add +} + +define i64 @t3(i32 %a, i32 %b) { +; +; O1-LABEL: t3( +; O1: { +; O1-NEXT: .reg .b32 %r<3>; +; O1-NEXT: .reg .b64 %rd<2>; +; O1-EMPTY: +; O1-NEXT: // %bb.0: +; O1-NEXT: ld.param.b32 %r1, [t3_param_0]; +; O1-NEXT: ld.param.b32 %r2, [t3_param_1]; +; O1-NEXT: mad.wide.s32 %rd1, %r1, %r2, 1; +; O1-NEXT: st.param.b64 [func_retval0], %rd1; +; O1-NEXT: ret; +; +; O0-LABEL: t3( +; O0: { +; O0-NEXT: .reg .b32 %r<4>; +; O0-NEXT: .reg .b64 %rd<3>; +; O0-EMPTY: +; O0-NEXT: // %bb.0: +; O0-NEXT: ld.param.b32 %r2, [t3_param_1]; +; O0-NEXT: ld.param.b32 %r1, [t3_param_0]; +; O0-NEXT: mul.lo.s32 %r3, %r1, %r2; +; O0-NEXT: cvt.s64.s32 %rd1, %r3; +; O0-NEXT: add.s64 %rd2, %rd1, 1; +; O0-NEXT: st.param.b64 [func_retval0], %rd2; +; O0-NEXT: ret; + %mul = mul nsw i32 %a, %b + %sext = sext i32 %mul to i64 + %add = add i64 1, %sext + ret i64 %add +} + +define i64 @t4(i32 %a, i64 %c) { +; +; O1-LABEL: t4( +; O1: { +; O1-NEXT: .reg .b32 %r<2>; +; O1-NEXT: .reg .b64 %rd<3>; +; O1-EMPTY: +; O1-NEXT: // %bb.0: +; O1-NEXT: ld.param.b32 %r1, [t4_param_0]; +; O1-NEXT: ld.param.b64 %rd1, [t4_param_1]; +; O1-NEXT: mad.wide.s32 %rd2, %r1, 3, %rd1; +; O1-NEXT: st.param.b64 [func_retval0], %rd2; +; O1-NEXT: ret; +; +; O0-LABEL: t4( +; O0: { +; O0-NEXT: .reg .b32 %r<3>; +; O0-NEXT: .reg .b64 %rd<4>; +; O0-EMPTY: +; O0-NEXT: // %bb.0: +; O0-NEXT: ld.param.b64 %rd1, [t4_param_1]; +; O0-NEXT: ld.param.b32 %r1, [t4_param_0]; +; O0-NEXT: mul.lo.s32 %r2, %r1, 3; +; O0-NEXT: cvt.s64.s32 %rd2, %r2; +; O0-NEXT: add.s64 %rd3, %rd1, %rd2; +; O0-NEXT: st.param.b64 [func_retval0], %rd3; +; O0-NEXT: ret; + %mul = mul nsw i32 %a, 3 + %sext = sext i32 %mul to i64 + %add = add i64 %c, %sext + ret i64 %add +} + +define i64 @t4_1(i32 %a, i64 %c) { +; +; O1-LABEL: t4_1( +; O1: { +; O1-NEXT: .reg .b32 %r<2>; +; O1-NEXT: .reg .b64 %rd<2>; +; O1-EMPTY: +; O1-NEXT: // %bb.0: +; O1-NEXT: ld.param.b32 %r1, [t4_1_param_0]; +; O1-NEXT: mad.wide.s32 %rd1, %r1, 3, 5; +; O1-NEXT: st.param.b64 [func_retval0], %rd1; +; O1-NEXT: ret; +; +; O0-LABEL: t4_1( +; O0: { +; O0-NEXT: .reg .b32 %r<3>; +; O0-NEXT: .reg .b64 %rd<3>; +; O0-EMPTY: +; O0-NEXT: // %bb.0: +; O0-NEXT: ld.param.b32 %r1, [t4_1_param_0]; +; O0-NEXT: mul.lo.s32 %r2, %r1, 3; +; O0-NEXT: cvt.s64.s32 %rd1, %r2; +; O0-NEXT: add.s64 %rd2, %rd1, 5; +; O0-NEXT: st.param.b64 [func_retval0], %rd2; +; O0-NEXT: ret; + %mul = mul nsw i32 %a, 3 + %sext = sext i32 %mul to i64 + %add = add i64 5, %sext + ret i64 %add +} + +define i64 @t5(i32 %a, i32 %b, i64 %c) { +; +; O1-LABEL: t5( +; O1: { +; O1-NEXT: .reg .b32 %r<3>; +; O1-NEXT: .reg .b64 %rd<3>; +; O1-EMPTY: +; O1-NEXT: // %bb.0: +; O1-NEXT: ld.param.b32 %r1, [t5_param_0]; +; O1-NEXT: ld.param.b32 %r2, [t5_param_1]; +; O1-NEXT: ld.param.b64 %rd1, [t5_param_2]; +; O1-NEXT: mad.wide.u32 %rd2, %r1, %r2, %rd1; +; O1-NEXT: st.param.b64 [func_retval0], %rd2; +; O1-NEXT: ret; +; +; O0-LABEL: t5( +; O0: { +; O0-NEXT: .reg .b32 %r<4>; +; O0-NEXT: .reg .b64 %rd<4>; +; O0-EMPTY: +; O0-NEXT: // %bb.0: +; O0-NEXT: ld.param.b64 %rd1, [t5_param_2]; +; O0-NEXT: ld.param.b32 %r2, [t5_param_1]; +; O0-NEXT: ld.param.b32 %r1, [t5_param_0]; +; O0-NEXT: mul.lo.s32 %r3, %r1, %r2; +; O0-NEXT: cvt.u64.u32 %rd2, %r3; +; O0-NEXT: add.s64 %rd3, %rd1, %rd2; +; O0-NEXT: st.param.b64 [func_retval0], %rd3; +; O0-NEXT: ret; + %mul = mul nuw i32 %a, %b + %zext = zext i32 %mul to i64 + %add = add i64 %c, %zext + ret i64 %add +} + +define i64 @t6(i32 %a, i32 %b, i64 %c) { +; +; O1-LABEL: t6( +; O1: { +; O1-NEXT: .reg .b32 %r<3>; +; O1-NEXT: .reg .b64 %rd<3>; +; O1-EMPTY: +; O1-NEXT: // %bb.0: +; O1-NEXT: ld.param.b32 %r1, [t6_param_0]; +; O1-NEXT: ld.param.b32 %r2, [t6_param_1]; +; O1-NEXT: ld.param.b64 %rd1, [t6_param_2]; +; O1-NEXT: mad.wide.u32 %rd2, %r1, %r2, %rd1; +; O1-NEXT: st.param.b64 [func_retval0], %rd2; +; O1-NEXT: ret; +; +; O0-LABEL: t6( +; O0: { +; O0-NEXT: .reg .b32 %r<4>; +; O0-NEXT: .reg .b64 %rd<4>; +; O0-EMPTY: +; O0-NEXT: // %bb.0: +; O0-NEXT: ld.param.b64 %rd1, [t6_param_2]; +; O0-NEXT: ld.param.b32 %r2, [t6_param_1]; +; O0-NEXT: ld.param.b32 %r1, [t6_param_0]; +; O0-NEXT: mul.lo.s32 %r3, %r1, %r2; +; O0-NEXT: cvt.u64.u32 %rd2, %r3; +; O0-NEXT: add.s64 %rd3, %rd2, %rd1; +; O0-NEXT: st.param.b64 [func_retval0], %rd3; +; O0-NEXT: ret; + %mul = mul nuw i32 %a, %b + %zext = zext i32 %mul to i64 + %add = add i64 %zext, %c + ret i64 %add +} + +define i32 @t7(i16 %a, i16 %b) { +; +; O1-LABEL: t7( +; O1: { +; O1-NEXT: .reg .b16 %rs<4>; +; O1-NEXT: .reg .b32 %r<2>; +; O1-EMPTY: +; O1-NEXT: // %bb.0: +; O1-NEXT: ld.param.b16 %rs1, [t7_param_0]; +; O1-NEXT: ld.param.b16 %rs2, [t7_param_1]; +; O1-NEXT: mul.lo.s16 %rs3, %rs1, %rs2; +; O1-NEXT: cvt.u32.u16 %r1, %rs3; +; O1-NEXT: st.param.b32 [func_retval0], %r1; +; O1-NEXT: ret; +; +; O0-LABEL: t7( +; O0: { +; O0-NEXT: .reg .b16 %rs<4>; +; O0-NEXT: .reg .b32 %r<2>; +; O0-EMPTY: +; O0-NEXT: // %bb.0: +; O0-NEXT: ld.param.b16 %rs2, [t7_param_1]; +; O0-NEXT: ld.param.b16 %rs1, [t7_param_0]; +; O0-NEXT: mul.lo.s16 %rs3, %rs1, %rs2; +; O0-NEXT: cvt.u32.u16 %r1, %rs3; +; O0-NEXT: st.param.b32 [func_retval0], %r1; +; O0-NEXT: ret; + %mul = mul i16 %a, %b + %zext = zext i16 %mul to i32 + ret i32 %zext +} + +define i32 @t8(i16 %a, i16 %b) { +; +; O1-LABEL: t8( +; O1: { +; O1-NEXT: .reg .b16 %rs<4>; +; O1-NEXT: .reg .b32 %r<2>; +; O1-EMPTY: +; O1-NEXT: // %bb.0: +; O1-NEXT: ld.param.b16 %rs1, [t8_param_0]; +; O1-NEXT: ld.param.b16 %rs2, [t8_param_1]; +; O1-NEXT: mul.lo.s16 %rs3, %rs1, %rs2; +; O1-NEXT: cvt.s32.s16 %r1, %rs3; +; O1-NEXT: st.param.b32 [func_retval0], %r1; +; O1-NEXT: ret; +; +; O0-LABEL: t8( +; O0: { +; O0-NEXT: .reg .b16 %rs<4>; +; O0-NEXT: .reg .b32 %r<2>; +; O0-EMPTY: +; O0-NEXT: // %bb.0: +; O0-NEXT: ld.param.b16 %rs2, [t8_param_1]; +; O0-NEXT: ld.param.b16 %rs1, [t8_param_0]; +; O0-NEXT: mul.lo.s16 %rs3, %rs1, %rs2; +; O0-NEXT: cvt.s32.s16 %r1, %rs3; +; O0-NEXT: st.param.b32 [func_retval0], %r1; +; O0-NEXT: ret; + %mul = mul i16 %a, %b + %sext = sext i16 %mul to i32 + ret i32 %sext +} + +define i64 @t9(i32 %a, i32 %b) { +; +; O1-LABEL: t9( +; O1: { +; O1-NEXT: .reg .b32 %r<4>; +; O1-NEXT: .reg .b64 %rd<2>; +; O1-EMPTY: +; O1-NEXT: // %bb.0: +; O1-NEXT: ld.param.b32 %r1, [t9_param_0]; +; O1-NEXT: ld.param.b32 %r2, [t9_param_1]; +; O1-NEXT: mul.lo.s32 %r3, %r1, %r2; +; O1-NEXT: cvt.u64.u32 %rd1, %r3; +; O1-NEXT: st.param.b64 [func_retval0], %rd1; +; O1-NEXT: ret; +; +; O0-LABEL: t9( +; O0: { +; O0-NEXT: .reg .b32 %r<4>; +; O0-NEXT: .reg .b64 %rd<2>; +; O0-EMPTY: +; O0-NEXT: // %bb.0: +; O0-NEXT: ld.param.b32 %r2, [t9_param_1]; +; O0-NEXT: ld.param.b32 %r1, [t9_param_0]; +; O0-NEXT: mul.lo.s32 %r3, %r1, %r2; +; O0-NEXT: cvt.u64.u32 %rd1, %r3; +; O0-NEXT: st.param.b64 [func_retval0], %rd1; +; O0-NEXT: ret; + %mul = mul i32 %a, %b + %zext = zext i32 %mul to i64 + ret i64 %zext +} + +define i64 @t10(i32 %a, i32 %b) { +; +; O1-LABEL: t10( +; O1: { +; O1-NEXT: .reg .b32 %r<4>; +; O1-NEXT: .reg .b64 %rd<2>; +; O1-EMPTY: +; O1-NEXT: // %bb.0: +; O1-NEXT: ld.param.b32 %r1, [t10_param_0]; +; O1-NEXT: ld.param.b32 %r2, [t10_param_1]; +; O1-NEXT: mul.lo.s32 %r3, %r1, %r2; +; O1-NEXT: cvt.s64.s32 %rd1, %r3; +; O1-NEXT: st.param.b64 [func_retval0], %rd1; +; O1-NEXT: ret; +; +; O0-LABEL: t10( +; O0: { +; O0-NEXT: .reg .b32 %r<4>; +; O0-NEXT: .reg .b64 %rd<2>; +; O0-EMPTY: +; O0-NEXT: // %bb.0: +; O0-NEXT: ld.param.b32 %r2, [t10_param_1]; +; O0-NEXT: ld.param.b32 %r1, [t10_param_0]; +; O0-NEXT: mul.lo.s32 %r3, %r1, %r2; +; O0-NEXT: cvt.s64.s32 %rd1, %r3; +; O0-NEXT: st.param.b64 [func_retval0], %rd1; +; O0-NEXT: ret; + %mul = mul i32 %a, %b + %sext = sext i32 %mul to i64 + ret i64 %sext +} + +define i32 @t11(i16 %a, i16 %b) { +; +; O1-LABEL: t11( +; O1: { +; O1-NEXT: .reg .b16 %rs<4>; +; O1-NEXT: .reg .b32 %r<2>; +; O1-EMPTY: +; O1-NEXT: // %bb.0: +; O1-NEXT: ld.param.b16 %rs1, [t11_param_0]; +; O1-NEXT: ld.param.b16 %rs2, [t11_param_1]; +; O1-NEXT: mul.lo.s16 %rs3, %rs1, %rs2; +; O1-NEXT: cvt.u32.u16 %r1, %rs3; +; O1-NEXT: st.param.b32 [func_retval0], %r1; +; O1-NEXT: ret; +; +; O0-LABEL: t11( +; O0: { +; O0-NEXT: .reg .b16 %rs<4>; +; O0-NEXT: .reg .b32 %r<2>; +; O0-EMPTY: +; O0-NEXT: // %bb.0: +; O0-NEXT: ld.param.b16 %rs2, [t11_param_1]; +; O0-NEXT: ld.param.b16 %rs1, [t11_param_0]; +; O0-NEXT: mul.lo.s16 %rs3, %rs1, %rs2; +; O0-NEXT: cvt.u32.u16 %r1, %rs3; +; O0-NEXT: st.param.b32 [func_retval0], %r1; +; O0-NEXT: ret; + %mul = mul nsw i16 %a, %b + %zext = zext i16 %mul to i32 + ret i32 %zext +} + +define i32 @t12(i16 %a, i16 %b) { +; +; O1-LABEL: t12( +; O1: { +; O1-NEXT: .reg .b16 %rs<3>; +; O1-NEXT: .reg .b32 %r<2>; +; O1-EMPTY: +; O1-NEXT: // %bb.0: +; O1-NEXT: ld.param.b16 %rs1, [t12_param_0]; +; O1-NEXT: ld.param.b16 %rs2, [t12_param_1]; +; O1-NEXT: mul.wide.s16 %r1, %rs1, %rs2; +; O1-NEXT: st.param.b32 [func_retval0], %r1; +; O1-NEXT: ret; +; +; O0-LABEL: t12( +; O0: { +; O0-NEXT: .reg .b16 %rs<4>; +; O0-NEXT: .reg .b32 %r<2>; +; O0-EMPTY: +; O0-NEXT: // %bb.0: +; O0-NEXT: ld.param.b16 %rs2, [t12_param_1]; +; O0-NEXT: ld.param.b16 %rs1, [t12_param_0]; +; O0-NEXT: mul.lo.s16 %rs3, %rs1, %rs2; +; O0-NEXT: cvt.s32.s16 %r1, %rs3; +; O0-NEXT: st.param.b32 [func_retval0], %r1; +; O0-NEXT: ret; + %mul = mul nsw i16 %a, %b + %sext = sext i16 %mul to i32 + ret i32 %sext +} + +define i64 @t13(i32 %a, i32 %b) { +; +; O1-LABEL: t13( +; O1: { +; O1-NEXT: .reg .b32 %r<4>; +; O1-NEXT: .reg .b64 %rd<2>; +; O1-EMPTY: +; O1-NEXT: // %bb.0: +; O1-NEXT: ld.param.b32 %r1, [t13_param_0]; +; O1-NEXT: ld.param.b32 %r2, [t13_param_1]; +; O1-NEXT: mul.lo.s32 %r3, %r1, %r2; +; O1-NEXT: cvt.u64.u32 %rd1, %r3; +; O1-NEXT: st.param.b64 [func_retval0], %rd1; +; O1-NEXT: ret; +; +; O0-LABEL: t13( +; O0: { +; O0-NEXT: .reg .b32 %r<4>; +; O0-NEXT: .reg .b64 %rd<2>; +; O0-EMPTY: +; O0-NEXT: // %bb.0: +; O0-NEXT: ld.param.b32 %r2, [t13_param_1]; +; O0-NEXT: ld.param.b32 %r1, [t13_param_0]; +; O0-NEXT: mul.lo.s32 %r3, %r1, %r2; +; O0-NEXT: cvt.u64.u32 %rd1, %r3; +; O0-NEXT: st.param.b64 [func_retval0], %rd1; +; O0-NEXT: ret; + %mul = mul nsw i32 %a, %b + %zext = zext i32 %mul to i64 + ret i64 %zext +} + +define i64 @t14(i32 %a, i32 %b) { +; +; O1-LABEL: t14( +; O1: { +; O1-NEXT: .reg .b32 %r<3>; +; O1-NEXT: .reg .b64 %rd<2>; +; O1-EMPTY: +; O1-NEXT: // %bb.0: +; O1-NEXT: ld.param.b32 %r1, [t14_param_0]; +; O1-NEXT: ld.param.b32 %r2, [t14_param_1]; +; O1-NEXT: mul.wide.s32 %rd1, %r1, %r2; +; O1-NEXT: st.param.b64 [func_retval0], %rd1; +; O1-NEXT: ret; +; +; O0-LABEL: t14( +; O0: { +; O0-NEXT: .reg .b32 %r<4>; +; O0-NEXT: .reg .b64 %rd<2>; +; O0-EMPTY: +; O0-NEXT: // %bb.0: +; O0-NEXT: ld.param.b32 %r2, [t14_param_1]; +; O0-NEXT: ld.param.b32 %r1, [t14_param_0]; +; O0-NEXT: mul.lo.s32 %r3, %r1, %r2; +; O0-NEXT: cvt.s64.s32 %rd1, %r3; +; O0-NEXT: st.param.b64 [func_retval0], %rd1; +; O0-NEXT: ret; + %mul = mul nsw i32 %a, %b + %sext = sext i32 %mul to i64 + ret i64 %sext +} + +define i32 @t15(i16 %a, i16 %b) { +; +; O1-LABEL: t15( +; O1: { +; O1-NEXT: .reg .b16 %rs<3>; +; O1-NEXT: .reg .b32 %r<2>; +; O1-EMPTY: +; O1-NEXT: // %bb.0: +; O1-NEXT: ld.param.b16 %rs1, [t15_param_0]; +; O1-NEXT: ld.param.b16 %rs2, [t15_param_1]; +; O1-NEXT: mul.wide.u16 %r1, %rs1, %rs2; +; O1-NEXT: st.param.b32 [func_retval0], %r1; +; O1-NEXT: ret; +; +; O0-LABEL: t15( +; O0: { +; O0-NEXT: .reg .b16 %rs<4>; +; O0-NEXT: .reg .b32 %r<2>; +; O0-EMPTY: +; O0-NEXT: // %bb.0: +; O0-NEXT: ld.param.b16 %rs2, [t15_param_1]; +; O0-NEXT: ld.param.b16 %rs1, [t15_param_0]; +; O0-NEXT: mul.lo.s16 %rs3, %rs1, %rs2; +; O0-NEXT: cvt.u32.u16 %r1, %rs3; +; O0-NEXT: st.param.b32 [func_retval0], %r1; +; O0-NEXT: ret; + %mul = mul nuw i16 %a, %b + %zext = zext i16 %mul to i32 + ret i32 %zext +} + +define i32 @t16(i16 %a, i16 %b) { +; +; O1-LABEL: t16( +; O1: { +; O1-NEXT: .reg .b16 %rs<4>; +; O1-NEXT: .reg .b32 %r<2>; +; O1-EMPTY: +; O1-NEXT: // %bb.0: +; O1-NEXT: ld.param.b16 %rs1, [t16_param_0]; +; O1-NEXT: ld.param.b16 %rs2, [t16_param_1]; +; O1-NEXT: mul.lo.s16 %rs3, %rs1, %rs2; +; O1-NEXT: cvt.s32.s16 %r1, %rs3; +; O1-NEXT: st.param.b32 [func_retval0], %r1; +; O1-NEXT: ret; +; +; O0-LABEL: t16( +; O0: { +; O0-NEXT: .reg .b16 %rs<4>; +; O0-NEXT: .reg .b32 %r<2>; +; O0-EMPTY: +; O0-NEXT: // %bb.0: +; O0-NEXT: ld.param.b16 %rs2, [t16_param_1]; +; O0-NEXT: ld.param.b16 %rs1, [t16_param_0]; +; O0-NEXT: mul.lo.s16 %rs3, %rs1, %rs2; +; O0-NEXT: cvt.s32.s16 %r1, %rs3; +; O0-NEXT: st.param.b32 [func_retval0], %r1; +; O0-NEXT: ret; + %mul = mul nuw i16 %a, %b + %sext = sext i16 %mul to i32 + ret i32 %sext +} + +define i64 @t17(i32 %a, i32 %b) { +; +; O1-LABEL: t17( +; O1: { +; O1-NEXT: .reg .b32 %r<3>; +; O1-NEXT: .reg .b64 %rd<2>; +; O1-EMPTY: +; O1-NEXT: // %bb.0: +; O1-NEXT: ld.param.b32 %r1, [t17_param_0]; +; O1-NEXT: ld.param.b32 %r2, [t17_param_1]; +; O1-NEXT: mul.wide.u32 %rd1, %r1, %r2; +; O1-NEXT: st.param.b64 [func_retval0], %rd1; +; O1-NEXT: ret; +; +; O0-LABEL: t17( +; O0: { +; O0-NEXT: .reg .b32 %r<4>; +; O0-NEXT: .reg .b64 %rd<2>; +; O0-EMPTY: +; O0-NEXT: // %bb.0: +; O0-NEXT: ld.param.b32 %r2, [t17_param_1]; +; O0-NEXT: ld.param.b32 %r1, [t17_param_0]; +; O0-NEXT: mul.lo.s32 %r3, %r1, %r2; +; O0-NEXT: cvt.u64.u32 %rd1, %r3; +; O0-NEXT: st.param.b64 [func_retval0], %rd1; +; O0-NEXT: ret; + %mul = mul nuw i32 %a, %b + %zext = zext i32 %mul to i64 + ret i64 %zext +} + +define i64 @t18(i32 %a, i32 %b) { +; +; O1-LABEL: t18( +; O1: { +; O1-NEXT: .reg .b32 %r<4>; +; O1-NEXT: .reg .b64 %rd<2>; +; O1-EMPTY: +; O1-NEXT: // %bb.0: +; O1-NEXT: ld.param.b32 %r1, [t18_param_0]; +; O1-NEXT: ld.param.b32 %r2, [t18_param_1]; +; O1-NEXT: mul.lo.s32 %r3, %r1, %r2; +; O1-NEXT: cvt.s64.s32 %rd1, %r3; +; O1-NEXT: st.param.b64 [func_retval0], %rd1; +; O1-NEXT: ret; +; +; O0-LABEL: t18( +; O0: { +; O0-NEXT: .reg .b32 %r<4>; +; O0-NEXT: .reg .b64 %rd<2>; +; O0-EMPTY: +; O0-NEXT: // %bb.0: +; O0-NEXT: ld.param.b32 %r2, [t18_param_1]; +; O0-NEXT: ld.param.b32 %r1, [t18_param_0]; +; O0-NEXT: mul.lo.s32 %r3, %r1, %r2; +; O0-NEXT: cvt.s64.s32 %rd1, %r3; +; O0-NEXT: st.param.b64 [func_retval0], %rd1; +; O0-NEXT: ret; + %mul = mul nuw i32 %a, %b + %sext = sext i32 %mul to i64 + ret i64 %sext +} + +define i32 @t19(i16 %a, i16 %b) { +; +; O1-LABEL: t19( +; O1: { +; O1-NEXT: .reg .b16 %rs<4>; +; O1-NEXT: .reg .b32 %r<2>; +; O1-EMPTY: +; O1-NEXT: // %bb.0: +; O1-NEXT: ld.param.b16 %rs1, [t19_param_0]; +; O1-NEXT: ld.param.b16 %rs2, [t19_param_1]; +; O1-NEXT: mul.lo.s16 %rs3, %rs1, %rs2; +; O1-NEXT: cvt.u32.u16 %r1, %rs3; +; O1-NEXT: st.param.b32 [func_retval0], %r1; +; O1-NEXT: ret; +; +; O0-LABEL: t19( +; O0: { +; O0-NEXT: .reg .b16 %rs<4>; +; O0-NEXT: .reg .b32 %r<2>; +; O0-EMPTY: +; O0-NEXT: // %bb.0: +; O0-NEXT: ld.param.b16 %rs2, [t19_param_1]; +; O0-NEXT: ld.param.b16 %rs1, [t19_param_0]; +; O0-NEXT: mul.lo.s16 %rs3, %rs1, %rs2; +; O0-NEXT: cvt.u32.u16 %r1, %rs3; +; O0-NEXT: st.param.b32 [func_retval0], %r1; +; O0-NEXT: ret; + %mul = mul i16 %a, %b + %zext = zext i16 %mul to i32 + ret i32 %zext +} + +define i32 @t20(i16 %a) { +; +; CHECK-LABEL: t20( +; CHECK: { +; CHECK-NEXT: .reg .b16 %rs<3>; +; CHECK-NEXT: .reg .b32 %r<2>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b16 %rs1, [t20_param_0]; +; CHECK-NEXT: shl.b16 %rs2, %rs1, 4; +; CHECK-NEXT: cvt.s32.s16 %r1, %rs2; +; CHECK-NEXT: st.param.b32 [func_retval0], %r1; +; CHECK-NEXT: ret; + %mul = shl i16 %a, 4 + %sext = sext i16 %mul to i32 + ret i32 %sext +} + +define i64 @t21(i32 %a) { +; +; CHECK-LABEL: t21( +; CHECK: { +; CHECK-NEXT: .reg .b32 %r<3>; +; CHECK-NEXT: .reg .b64 %rd<2>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b32 %r1, [t21_param_0]; +; CHECK-NEXT: shl.b32 %r2, %r1, 4; +; CHECK-NEXT: cvt.u64.u32 %rd1, %r2; +; CHECK-NEXT: st.param.b64 [func_retval0], %rd1; +; CHECK-NEXT: ret; + %mul = shl i32 %a, 4 + %zext = zext i32 %mul to i64 + ret i64 %zext +} + +define i64 @t22(i32 %a) { +; +; CHECK-LABEL: t22( +; CHECK: { +; CHECK-NEXT: .reg .b32 %r<3>; +; CHECK-NEXT: .reg .b64 %rd<2>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b32 %r1, [t22_param_0]; +; CHECK-NEXT: shl.b32 %r2, %r1, 4; +; CHECK-NEXT: cvt.s64.s32 %rd1, %r2; +; CHECK-NEXT: st.param.b64 [func_retval0], %rd1; +; CHECK-NEXT: ret; + %mul = shl i32 %a, 4 + %sext = sext i32 %mul to i64 + ret i64 %sext +} + +define i32 @t23(i16 %a, i16 %b) { +; +; CHECK-LABEL: t23( +; CHECK: { +; CHECK-NEXT: .reg .b16 %rs<3>; +; CHECK-NEXT: .reg .b32 %r<2>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b16 %rs1, [t23_param_0]; +; CHECK-NEXT: shl.b16 %rs2, %rs1, 4; +; CHECK-NEXT: cvt.u32.u16 %r1, %rs2; +; CHECK-NEXT: st.param.b32 [func_retval0], %r1; +; CHECK-NEXT: ret; + %mul = shl nsw i16 %a, 4 + %zext = zext i16 %mul to i32 + ret i32 %zext +} + +define i32 @t24(i16 %a, i16 %b) { +; +; O1-LABEL: t24( +; O1: { +; O1-NEXT: .reg .b16 %rs<2>; +; O1-NEXT: .reg .b32 %r<2>; +; O1-EMPTY: +; O1-NEXT: // %bb.0: +; O1-NEXT: ld.param.b16 %rs1, [t24_param_0]; +; O1-NEXT: mul.wide.s16 %r1, %rs1, 16; +; O1-NEXT: st.param.b32 [func_retval0], %r1; +; O1-NEXT: ret; +; +; O0-LABEL: t24( +; O0: { +; O0-NEXT: .reg .b16 %rs<3>; +; O0-NEXT: .reg .b32 %r<2>; +; O0-EMPTY: +; O0-NEXT: // %bb.0: +; O0-NEXT: ld.param.b16 %rs1, [t24_param_0]; +; O0-NEXT: shl.b16 %rs2, %rs1, 4; +; O0-NEXT: cvt.s32.s16 %r1, %rs2; +; O0-NEXT: st.param.b32 [func_retval0], %r1; +; O0-NEXT: ret; + %mul = shl nsw i16 %a, 4 + %sext = sext i16 %mul to i32 + ret i32 %sext +} + +define i64 @t25(i32 %a) { +; +; CHECK-LABEL: t25( +; CHECK: { +; CHECK-NEXT: .reg .b32 %r<3>; +; CHECK-NEXT: .reg .b64 %rd<2>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b32 %r1, [t25_param_0]; +; CHECK-NEXT: shl.b32 %r2, %r1, 4; +; CHECK-NEXT: cvt.u64.u32 %rd1, %r2; +; CHECK-NEXT: st.param.b64 [func_retval0], %rd1; +; CHECK-NEXT: ret; + %mul = shl nsw i32 %a, 4 + %zext = zext i32 %mul to i64 + ret i64 %zext +} + +define i64 @t26(i32 %a) { +; +; O1-LABEL: t26( +; O1: { +; O1-NEXT: .reg .b32 %r<2>; +; O1-NEXT: .reg .b64 %rd<2>; +; O1-EMPTY: +; O1-NEXT: // %bb.0: +; O1-NEXT: ld.param.b32 %r1, [t26_param_0]; +; O1-NEXT: mul.wide.s32 %rd1, %r1, 16; +; O1-NEXT: st.param.b64 [func_retval0], %rd1; +; O1-NEXT: ret; +; +; O0-LABEL: t26( +; O0: { +; O0-NEXT: .reg .b32 %r<3>; +; O0-NEXT: .reg .b64 %rd<2>; +; O0-EMPTY: +; O0-NEXT: // %bb.0: +; O0-NEXT: ld.param.b32 %r1, [t26_param_0]; +; O0-NEXT: shl.b32 %r2, %r1, 4; +; O0-NEXT: cvt.s64.s32 %rd1, %r2; +; O0-NEXT: st.param.b64 [func_retval0], %rd1; +; O0-NEXT: ret; + %mul = shl nsw i32 %a, 4 + %sext = sext i32 %mul to i64 + ret i64 %sext +} + +define i32 @t27(i16 %a, i16 %b) { +; +; O1-LABEL: t27( +; O1: { +; O1-NEXT: .reg .b16 %rs<2>; +; O1-NEXT: .reg .b32 %r<2>; +; O1-EMPTY: +; O1-NEXT: // %bb.0: +; O1-NEXT: ld.param.b16 %rs1, [t27_param_0]; +; O1-NEXT: mul.wide.u16 %r1, %rs1, 16; +; O1-NEXT: st.param.b32 [func_retval0], %r1; +; O1-NEXT: ret; +; +; O0-LABEL: t27( +; O0: { +; O0-NEXT: .reg .b16 %rs<3>; +; O0-NEXT: .reg .b32 %r<2>; +; O0-EMPTY: +; O0-NEXT: // %bb.0: +; O0-NEXT: ld.param.b16 %rs1, [t27_param_0]; +; O0-NEXT: shl.b16 %rs2, %rs1, 4; +; O0-NEXT: cvt.u32.u16 %r1, %rs2; +; O0-NEXT: st.param.b32 [func_retval0], %r1; +; O0-NEXT: ret; + %mul = shl nuw i16 %a, 4 + %zext = zext i16 %mul to i32 + ret i32 %zext +} + +define i32 @t28(i16 %a, i16 %b) { +; +; CHECK-LABEL: t28( +; CHECK: { +; CHECK-NEXT: .reg .b16 %rs<3>; +; CHECK-NEXT: .reg .b32 %r<2>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b16 %rs1, [t28_param_0]; +; CHECK-NEXT: shl.b16 %rs2, %rs1, 4; +; CHECK-NEXT: cvt.s32.s16 %r1, %rs2; +; CHECK-NEXT: st.param.b32 [func_retval0], %r1; +; CHECK-NEXT: ret; + %mul = shl nuw i16 %a, 4 + %sext = sext i16 %mul to i32 + ret i32 %sext +} + +define i64 @t29(i32 %a) { +; +; O1-LABEL: t29( +; O1: { +; O1-NEXT: .reg .b32 %r<2>; +; O1-NEXT: .reg .b64 %rd<2>; +; O1-EMPTY: +; O1-NEXT: // %bb.0: +; O1-NEXT: ld.param.b32 %r1, [t29_param_0]; +; O1-NEXT: mul.wide.u32 %rd1, %r1, 16; +; O1-NEXT: st.param.b64 [func_retval0], %rd1; +; O1-NEXT: ret; +; +; O0-LABEL: t29( +; O0: { +; O0-NEXT: .reg .b32 %r<3>; +; O0-NEXT: .reg .b64 %rd<2>; +; O0-EMPTY: +; O0-NEXT: // %bb.0: +; O0-NEXT: ld.param.b32 %r1, [t29_param_0]; +; O0-NEXT: shl.b32 %r2, %r1, 4; +; O0-NEXT: cvt.u64.u32 %rd1, %r2; +; O0-NEXT: st.param.b64 [func_retval0], %rd1; +; O0-NEXT: ret; + %mul = shl nuw i32 %a, 4 + %zext = zext i32 %mul to i64 + ret i64 %zext +} + +define i64 @t30(i32 %a) { +; +; CHECK-LABEL: t30( +; CHECK: { +; CHECK-NEXT: .reg .b32 %r<3>; +; CHECK-NEXT: .reg .b64 %rd<2>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b32 %r1, [t30_param_0]; +; CHECK-NEXT: shl.b32 %r2, %r1, 4; +; CHECK-NEXT: cvt.s64.s32 %rd1, %r2; +; CHECK-NEXT: st.param.b64 [func_retval0], %rd1; +; CHECK-NEXT: ret; + %mul = shl nuw i32 %a, 4 + %sext = sext i32 %mul to i64 + ret i64 %sext +} + +define i64 @t31(i32 %a, i32 %b) { +; +; O1-LABEL: t31( +; O1: { +; O1-NEXT: .reg .b32 %r<4>; +; O1-NEXT: .reg .b64 %rd<2>; +; O1-EMPTY: +; O1-NEXT: // %bb.0: +; O1-NEXT: ld.param.b32 %r1, [t31_param_0]; +; O1-NEXT: ld.param.b32 %r2, [t31_param_1]; +; O1-NEXT: shl.b32 %r3, %r1, %r2; +; O1-NEXT: cvt.s64.s32 %rd1, %r3; +; O1-NEXT: st.param.b64 [func_retval0], %rd1; +; O1-NEXT: ret; +; +; O0-LABEL: t31( +; O0: { +; O0-NEXT: .reg .b32 %r<4>; +; O0-NEXT: .reg .b64 %rd<2>; +; O0-EMPTY: +; O0-NEXT: // %bb.0: +; O0-NEXT: ld.param.b32 %r2, [t31_param_1]; +; O0-NEXT: ld.param.b32 %r1, [t31_param_0]; +; O0-NEXT: shl.b32 %r3, %r1, %r2; +; O0-NEXT: cvt.s64.s32 %rd1, %r3; +; O0-NEXT: st.param.b64 [func_retval0], %rd1; +; O0-NEXT: ret; + %mul = shl nuw i32 %a, %b + %sext = sext i32 %mul to i64 + ret i64 %sext +} + +define i32 @t32(i16 %a, i16 %b, i32 %c) { +; +; O1-LABEL: t32( +; O1: { +; O1-NEXT: .reg .b16 %rs<3>; +; O1-NEXT: .reg .b32 %r<3>; +; O1-EMPTY: +; O1-NEXT: // %bb.0: +; O1-NEXT: ld.param.b16 %rs1, [t32_param_0]; +; O1-NEXT: ld.param.b16 %rs2, [t32_param_1]; +; O1-NEXT: ld.param.b32 %r1, [t32_param_2]; +; O1-NEXT: mad.wide.s16 %r2, %rs1, %rs2, %r1; +; O1-NEXT: st.param.b32 [func_retval0], %r2; +; O1-NEXT: ret; +; +; O0-LABEL: t32( +; O0: { +; O0-NEXT: .reg .b16 %rs<4>; +; O0-NEXT: .reg .b32 %r<4>; +; O0-EMPTY: +; O0-NEXT: // %bb.0: +; O0-NEXT: ld.param.b32 %r1, [t32_param_2]; +; O0-NEXT: ld.param.b16 %rs2, [t32_param_1]; +; O0-NEXT: ld.param.b16 %rs1, [t32_param_0]; +; O0-NEXT: mul.lo.s16 %rs3, %rs1, %rs2; +; O0-NEXT: cvt.s32.s16 %r2, %rs3; +; O0-NEXT: add.s32 %r3, %r1, %r2; +; O0-NEXT: st.param.b32 [func_retval0], %r3; +; O0-NEXT: ret; + %mul = mul nsw i16 %a, %b + %sext = sext i16 %mul to i32 + %add = add i32 %c, %sext + ret i32 %add +} + +define i32 @t33(i16 %a, i16 %b, i32 %c) { +; +; O1-LABEL: t33( +; O1: { +; O1-NEXT: .reg .b16 %rs<3>; +; O1-NEXT: .reg .b32 %r<3>; +; O1-EMPTY: +; O1-NEXT: // %bb.0: +; O1-NEXT: ld.param.b16 %rs1, [t33_param_0]; +; O1-NEXT: ld.param.b16 %rs2, [t33_param_1]; +; O1-NEXT: ld.param.b32 %r1, [t33_param_2]; +; O1-NEXT: mad.wide.s16 %r2, %rs1, %rs2, %r1; +; O1-NEXT: st.param.b32 [func_retval0], %r2; +; O1-NEXT: ret; +; +; O0-LABEL: t33( +; O0: { +; O0-NEXT: .reg .b16 %rs<4>; +; O0-NEXT: .reg .b32 %r<4>; +; O0-EMPTY: +; O0-NEXT: // %bb.0: +; O0-NEXT: ld.param.b32 %r1, [t33_param_2]; +; O0-NEXT: ld.param.b16 %rs2, [t33_param_1]; +; O0-NEXT: ld.param.b16 %rs1, [t33_param_0]; +; O0-NEXT: mul.lo.s16 %rs3, %rs1, %rs2; +; O0-NEXT: cvt.s32.s16 %r2, %rs3; +; O0-NEXT: add.s32 %r3, %r1, %r2; +; O0-NEXT: st.param.b32 [func_retval0], %r3; +; O0-NEXT: ret; + %mul = mul nsw i16 %a, %b + %sext = sext i16 %mul to i32 + %add = add i32 %c, %sext + ret i32 %add +} + +define i32 @t34(i16 %a, i16 %b) { +; +; O1-LABEL: t34( +; O1: { +; O1-NEXT: .reg .b16 %rs<3>; +; O1-NEXT: .reg .b32 %r<2>; +; O1-EMPTY: +; O1-NEXT: // %bb.0: +; O1-NEXT: ld.param.b16 %rs1, [t34_param_0]; +; O1-NEXT: ld.param.b16 %rs2, [t34_param_1]; +; O1-NEXT: mad.wide.s16 %r1, %rs1, %rs2, 1; +; O1-NEXT: st.param.b32 [func_retval0], %r1; +; O1-NEXT: ret; +; +; O0-LABEL: t34( +; O0: { +; O0-NEXT: .reg .b16 %rs<4>; +; O0-NEXT: .reg .b32 %r<3>; +; O0-EMPTY: +; O0-NEXT: // %bb.0: +; O0-NEXT: ld.param.b16 %rs2, [t34_param_1]; +; O0-NEXT: ld.param.b16 %rs1, [t34_param_0]; +; O0-NEXT: mul.lo.s16 %rs3, %rs1, %rs2; +; O0-NEXT: cvt.s32.s16 %r1, %rs3; +; O0-NEXT: add.s32 %r2, %r1, 1; +; O0-NEXT: st.param.b32 [func_retval0], %r2; +; O0-NEXT: ret; + %mul = mul nsw i16 %a, %b + %sext = sext i16 %mul to i32 + %add = add i32 1, %sext + ret i32 %add +} + +define i32 @t35(i16 %a, i32 %c) { +; +; O1-LABEL: t35( +; O1: { +; O1-NEXT: .reg .b16 %rs<2>; +; O1-NEXT: .reg .b32 %r<3>; +; O1-EMPTY: +; O1-NEXT: // %bb.0: +; O1-NEXT: ld.param.b16 %rs1, [t35_param_0]; +; O1-NEXT: ld.param.b32 %r1, [t35_param_1]; +; O1-NEXT: mad.wide.s16 %r2, %rs1, 3, %r1; +; O1-NEXT: st.param.b32 [func_retval0], %r2; +; O1-NEXT: ret; +; +; O0-LABEL: t35( +; O0: { +; O0-NEXT: .reg .b16 %rs<3>; +; O0-NEXT: .reg .b32 %r<4>; +; O0-EMPTY: +; O0-NEXT: // %bb.0: +; O0-NEXT: ld.param.b32 %r1, [t35_param_1]; +; O0-NEXT: ld.param.b16 %rs1, [t35_param_0]; +; O0-NEXT: mul.lo.s16 %rs2, %rs1, 3; +; O0-NEXT: cvt.s32.s16 %r2, %rs2; +; O0-NEXT: add.s32 %r3, %r1, %r2; +; O0-NEXT: st.param.b32 [func_retval0], %r3; +; O0-NEXT: ret; + %mul = mul nsw i16 %a, 3 + %sext = sext i16 %mul to i32 + %add = add i32 %c, %sext + ret i32 %add +} + +define i32 @t36(i16 %a, i32 %c) { +; +; O1-LABEL: t36( +; O1: { +; O1-NEXT: .reg .b16 %rs<2>; +; O1-NEXT: .reg .b32 %r<2>; +; O1-EMPTY: +; O1-NEXT: // %bb.0: +; O1-NEXT: ld.param.b16 %rs1, [t36_param_0]; +; O1-NEXT: mad.wide.s16 %r1, %rs1, 3, 5; +; O1-NEXT: st.param.b32 [func_retval0], %r1; +; O1-NEXT: ret; +; +; O0-LABEL: t36( +; O0: { +; O0-NEXT: .reg .b16 %rs<3>; +; O0-NEXT: .reg .b32 %r<3>; +; O0-EMPTY: +; O0-NEXT: // %bb.0: +; O0-NEXT: ld.param.b16 %rs1, [t36_param_0]; +; O0-NEXT: mul.lo.s16 %rs2, %rs1, 3; +; O0-NEXT: cvt.s32.s16 %r1, %rs2; +; O0-NEXT: add.s32 %r2, %r1, 5; +; O0-NEXT: st.param.b32 [func_retval0], %r2; +; O0-NEXT: ret; + %mul = mul nsw i16 %a, 3 + %sext = sext i16 %mul to i32 + %add = add i32 5, %sext + ret i32 %add +} + +define i32 @t37(i16 %a, i16 %b, i32 %c) { +; +; O1-LABEL: t37( +; O1: { +; O1-NEXT: .reg .b16 %rs<3>; +; O1-NEXT: .reg .b32 %r<3>; +; O1-EMPTY: +; O1-NEXT: // %bb.0: +; O1-NEXT: ld.param.b16 %rs1, [t37_param_0]; +; O1-NEXT: ld.param.b16 %rs2, [t37_param_1]; +; O1-NEXT: ld.param.b32 %r1, [t37_param_2]; +; O1-NEXT: mad.wide.u16 %r2, %rs1, %rs2, %r1; +; O1-NEXT: st.param.b32 [func_retval0], %r2; +; O1-NEXT: ret; +; +; O0-LABEL: t37( +; O0: { +; O0-NEXT: .reg .b16 %rs<4>; +; O0-NEXT: .reg .b32 %r<4>; +; O0-EMPTY: +; O0-NEXT: // %bb.0: +; O0-NEXT: ld.param.b32 %r1, [t37_param_2]; +; O0-NEXT: ld.param.b16 %rs2, [t37_param_1]; +; O0-NEXT: ld.param.b16 %rs1, [t37_param_0]; +; O0-NEXT: mul.lo.s16 %rs3, %rs1, %rs2; +; O0-NEXT: cvt.u32.u16 %r2, %rs3; +; O0-NEXT: add.s32 %r3, %r1, %r2; +; O0-NEXT: st.param.b32 [func_retval0], %r3; +; O0-NEXT: ret; + %mul = mul nuw i16 %a, %b + %zext = zext i16 %mul to i32 + %add = add i32 %c, %zext + ret i32 %add +} + +define i32 @t38(i16 %a, i16 %b, i32 %c) { +; +; O1-LABEL: t38( +; O1: { +; O1-NEXT: .reg .b16 %rs<3>; +; O1-NEXT: .reg .b32 %r<3>; +; O1-EMPTY: +; O1-NEXT: // %bb.0: +; O1-NEXT: ld.param.b16 %rs1, [t38_param_0]; +; O1-NEXT: ld.param.b16 %rs2, [t38_param_1]; +; O1-NEXT: ld.param.b32 %r1, [t38_param_2]; +; O1-NEXT: mad.wide.u16 %r2, %rs1, %rs2, %r1; +; O1-NEXT: st.param.b32 [func_retval0], %r2; +; O1-NEXT: ret; +; +; O0-LABEL: t38( +; O0: { +; O0-NEXT: .reg .b16 %rs<4>; +; O0-NEXT: .reg .b32 %r<4>; +; O0-EMPTY: +; O0-NEXT: // %bb.0: +; O0-NEXT: ld.param.b32 %r1, [t38_param_2]; +; O0-NEXT: ld.param.b16 %rs2, [t38_param_1]; +; O0-NEXT: ld.param.b16 %rs1, [t38_param_0]; +; O0-NEXT: mul.lo.s16 %rs3, %rs1, %rs2; +; O0-NEXT: cvt.u32.u16 %r2, %rs3; +; O0-NEXT: add.s32 %r3, %r2, %r1; +; O0-NEXT: st.param.b32 [func_retval0], %r3; +; O0-NEXT: ret; + %mul = mul nuw i16 %a, %b + %zext = zext i16 %mul to i32 + %add = add i32 %zext, %c + ret i32 %add +} + +define i64 @t39(i16 %a, i16 %b) { +; O1-LABEL: t39( +; O1: { +; O1-NEXT: .reg .b16 %rs<4>; +; O1-NEXT: .reg .b64 %rd<2>; +; O1-EMPTY: +; O1-NEXT: // %bb.0: +; O1-NEXT: ld.param.b16 %rs1, [t39_param_0]; +; O1-NEXT: ld.param.b16 %rs2, [t39_param_1]; +; O1-NEXT: mul.lo.s16 %rs3, %rs1, %rs2; +; O1-NEXT: cvt.u64.u16 %rd1, %rs3; +; O1-NEXT: st.param.b64 [func_retval0], %rd1; +; O1-NEXT: ret; +; +; O0-LABEL: t39( +; O0: { +; O0-NEXT: .reg .b16 %rs<4>; +; O0-NEXT: .reg .b64 %rd<2>; +; O0-EMPTY: +; O0-NEXT: // %bb.0: +; O0-NEXT: ld.param.b16 %rs2, [t39_param_1]; +; O0-NEXT: ld.param.b16 %rs1, [t39_param_0]; +; O0-NEXT: mul.lo.s16 %rs3, %rs1, %rs2; +; O0-NEXT: cvt.u64.u16 %rd1, %rs3; +; O0-NEXT: st.param.b64 [func_retval0], %rd1; +; O0-NEXT: ret; + %mul = mul i16 %a, %b + %zext = zext i16 %mul to i64 + ret i64 %zext +} + +define i64 @t40(i16 %a, i16 %b) { +; O1-LABEL: t40( +; O1: { +; O1-NEXT: .reg .b16 %rs<4>; +; O1-NEXT: .reg .b64 %rd<2>; +; O1-EMPTY: +; O1-NEXT: // %bb.0: +; O1-NEXT: ld.param.b16 %rs1, [t40_param_0]; +; O1-NEXT: ld.param.b16 %rs2, [t40_param_1]; +; O1-NEXT: mul.lo.s16 %rs3, %rs1, %rs2; +; O1-NEXT: cvt.u64.u16 %rd1, %rs3; +; O1-NEXT: st.param.b64 [func_retval0], %rd1; +; O1-NEXT: ret; +; +; O0-LABEL: t40( +; O0: { +; O0-NEXT: .reg .b16 %rs<4>; +; O0-NEXT: .reg .b64 %rd<2>; +; O0-EMPTY: +; O0-NEXT: // %bb.0: +; O0-NEXT: ld.param.b16 %rs2, [t40_param_1]; +; O0-NEXT: ld.param.b16 %rs1, [t40_param_0]; +; O0-NEXT: mul.lo.s16 %rs3, %rs1, %rs2; +; O0-NEXT: cvt.u64.u16 %rd1, %rs3; +; O0-NEXT: st.param.b64 [func_retval0], %rd1; +; O0-NEXT: ret; + %mul = mul nuw i16 %a, %b + %zext = zext i16 %mul to i64 + ret i64 %zext +} + +define i64 @t41(i16 %a, i16 %b) { +; O1-LABEL: t41( +; O1: { +; O1-NEXT: .reg .b16 %rs<4>; +; O1-NEXT: .reg .b64 %rd<2>; +; O1-EMPTY: +; O1-NEXT: // %bb.0: +; O1-NEXT: ld.param.b16 %rs1, [t41_param_0]; +; O1-NEXT: ld.param.b16 %rs2, [t41_param_1]; +; O1-NEXT: mul.lo.s16 %rs3, %rs1, %rs2; +; O1-NEXT: cvt.s64.s16 %rd1, %rs3; +; O1-NEXT: st.param.b64 [func_retval0], %rd1; +; O1-NEXT: ret; +; +; O0-LABEL: t41( +; O0: { +; O0-NEXT: .reg .b16 %rs<4>; +; O0-NEXT: .reg .b64 %rd<2>; +; O0-EMPTY: +; O0-NEXT: // %bb.0: +; O0-NEXT: ld.param.b16 %rs2, [t41_param_1]; +; O0-NEXT: ld.param.b16 %rs1, [t41_param_0]; +; O0-NEXT: mul.lo.s16 %rs3, %rs1, %rs2; +; O0-NEXT: cvt.s64.s16 %rd1, %rs3; +; O0-NEXT: st.param.b64 [func_retval0], %rd1; +; O0-NEXT: ret; + %mul = mul nsw i16 %a, %b + %sext = sext i16 %mul to i64 + ret i64 %sext +} + +define i32 @t42(i16 %a, i16 %b, ptr %ptr) { +; O1-LABEL: t42( +; O1: { +; O1-NEXT: .reg .b16 %rs<4>; +; O1-NEXT: .reg .b32 %r<2>; +; O1-NEXT: .reg .b64 %rd<2>; +; O1-EMPTY: +; O1-NEXT: // %bb.0: +; O1-NEXT: ld.param.b16 %rs1, [t42_param_0]; +; O1-NEXT: ld.param.b16 %rs2, [t42_param_1]; +; O1-NEXT: mul.lo.s16 %rs3, %rs1, %rs2; +; O1-NEXT: ld.param.b64 %rd1, [t42_param_2]; +; O1-NEXT: st.b16 [%rd1], %rs3; +; O1-NEXT: cvt.u32.u16 %r1, %rs3; +; O1-NEXT: st.param.b32 [func_retval0], %r1; +; O1-NEXT: ret; +; +; O0-LABEL: t42( +; O0: { +; O0-NEXT: .reg .b16 %rs<4>; +; O0-NEXT: .reg .b32 %r<2>; +; O0-NEXT: .reg .b64 %rd<2>; +; O0-EMPTY: +; O0-NEXT: // %bb.0: +; O0-NEXT: ld.param.b64 %rd1, [t42_param_2]; +; O0-NEXT: ld.param.b16 %rs2, [t42_param_1]; +; O0-NEXT: ld.param.b16 %rs1, [t42_param_0]; +; O0-NEXT: mul.lo.s16 %rs3, %rs1, %rs2; +; O0-NEXT: st.b16 [%rd1], %rs3; +; O0-NEXT: cvt.u32.u16 %r1, %rs3; +; O0-NEXT: st.param.b32 [func_retval0], %r1; +; O0-NEXT: ret; + %mul = mul nuw i16 %a, %b + store i16 %mul, ptr %ptr + %zext = zext i16 %mul to i32 + ret i32 %zext +} + +define i32 @t43(i16 %a, i16 %b, i32 %c, ptr %ptr) { +; O1-LABEL: t43( +; O1: { +; O1-NEXT: .reg .b16 %rs<4>; +; O1-NEXT: .reg .b32 %r<4>; +; O1-NEXT: .reg .b64 %rd<2>; +; O1-EMPTY: +; O1-NEXT: // %bb.0: +; O1-NEXT: ld.param.b16 %rs1, [t43_param_0]; +; O1-NEXT: ld.param.b16 %rs2, [t43_param_1]; +; O1-NEXT: mul.lo.s16 %rs3, %rs1, %rs2; +; O1-NEXT: ld.param.b64 %rd1, [t43_param_3]; +; O1-NEXT: st.b16 [%rd1], %rs3; +; O1-NEXT: ld.param.b32 %r1, [t43_param_2]; +; O1-NEXT: cvt.u32.u16 %r2, %rs3; +; O1-NEXT: add.s32 %r3, %r2, %r1; +; O1-NEXT: st.param.b32 [func_retval0], %r3; +; O1-NEXT: ret; +; +; O0-LABEL: t43( +; O0: { +; O0-NEXT: .reg .b16 %rs<4>; +; O0-NEXT: .reg .b32 %r<4>; +; O0-NEXT: .reg .b64 %rd<2>; +; O0-EMPTY: +; O0-NEXT: // %bb.0: +; O0-NEXT: ld.param.b64 %rd1, [t43_param_3]; +; O0-NEXT: ld.param.b32 %r1, [t43_param_2]; +; O0-NEXT: ld.param.b16 %rs2, [t43_param_1]; +; O0-NEXT: ld.param.b16 %rs1, [t43_param_0]; +; O0-NEXT: mul.lo.s16 %rs3, %rs1, %rs2; +; O0-NEXT: st.b16 [%rd1], %rs3; +; O0-NEXT: cvt.u32.u16 %r2, %rs3; +; O0-NEXT: add.s32 %r3, %r2, %r1; +; O0-NEXT: st.param.b32 [func_retval0], %r3; +; O0-NEXT: ret; + %mul = mul nuw i16 %a, %b + store i16 %mul, ptr %ptr + %zext = zext i16 %mul to i32 + %add = add i32 %zext, %c + ret i32 %add +} diff --git a/llvm/test/CodeGen/NVPTX/compare-int.ll b/llvm/test/CodeGen/NVPTX/compare-int.ll index b44ae47..9338172d 100644 --- a/llvm/test/CodeGen/NVPTX/compare-int.ll +++ b/llvm/test/CodeGen/NVPTX/compare-int.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc < %s -mtriple=nvptx -mcpu=sm_20 | FileCheck %s ; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_20 | FileCheck %s ; RUN: %if ptxas && !ptxas-12.0 %{ llc < %s -mtriple=nvptx -mcpu=sm_20 | %ptxas-verify %} @@ -11,90 +12,180 @@ ;;; i64 define i64 @icmp_eq_i64(i64 %a, i64 %b) { -; CHECK: setp.eq.b64 %p[[P0:[0-9]+]], %rd{{[0-9]+}}, %rd{{[0-9]+}} -; CHECK: selp.b64 %rd{{[0-9]+}}, 1, 0, %p[[P0]] -; CHECK: ret +; CHECK-LABEL: icmp_eq_i64( +; CHECK: { +; CHECK-NEXT: .reg .pred %p<2>; +; CHECK-NEXT: .reg .b64 %rd<4>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b64 %rd1, [icmp_eq_i64_param_0]; +; CHECK-NEXT: ld.param.b64 %rd2, [icmp_eq_i64_param_1]; +; CHECK-NEXT: setp.eq.b64 %p1, %rd1, %rd2; +; CHECK-NEXT: selp.b64 %rd3, 1, 0, %p1; +; CHECK-NEXT: st.param.b64 [func_retval0], %rd3; +; CHECK-NEXT: ret; %cmp = icmp eq i64 %a, %b %ret = zext i1 %cmp to i64 ret i64 %ret } define i64 @icmp_ne_i64(i64 %a, i64 %b) { -; CHECK: setp.ne.b64 %p[[P0:[0-9]+]], %rd{{[0-9]+}}, %rd{{[0-9]+}} -; CHECK: selp.b64 %rd{{[0-9]+}}, 1, 0, %p[[P0]] -; CHECK: ret +; CHECK-LABEL: icmp_ne_i64( +; CHECK: { +; CHECK-NEXT: .reg .pred %p<2>; +; CHECK-NEXT: .reg .b64 %rd<4>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b64 %rd1, [icmp_ne_i64_param_0]; +; CHECK-NEXT: ld.param.b64 %rd2, [icmp_ne_i64_param_1]; +; CHECK-NEXT: setp.ne.b64 %p1, %rd1, %rd2; +; CHECK-NEXT: selp.b64 %rd3, 1, 0, %p1; +; CHECK-NEXT: st.param.b64 [func_retval0], %rd3; +; CHECK-NEXT: ret; %cmp = icmp ne i64 %a, %b %ret = zext i1 %cmp to i64 ret i64 %ret } define i64 @icmp_ugt_i64(i64 %a, i64 %b) { -; CHECK: setp.gt.u64 %p[[P0:[0-9]+]], %rd{{[0-9]+}}, %rd{{[0-9]+}} -; CHECK: selp.b64 %rd{{[0-9]+}}, 1, 0, %p[[P0]] -; CHECK: ret +; CHECK-LABEL: icmp_ugt_i64( +; CHECK: { +; CHECK-NEXT: .reg .pred %p<2>; +; CHECK-NEXT: .reg .b64 %rd<4>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b64 %rd1, [icmp_ugt_i64_param_0]; +; CHECK-NEXT: ld.param.b64 %rd2, [icmp_ugt_i64_param_1]; +; CHECK-NEXT: setp.gt.u64 %p1, %rd1, %rd2; +; CHECK-NEXT: selp.b64 %rd3, 1, 0, %p1; +; CHECK-NEXT: st.param.b64 [func_retval0], %rd3; +; CHECK-NEXT: ret; %cmp = icmp ugt i64 %a, %b %ret = zext i1 %cmp to i64 ret i64 %ret } define i64 @icmp_uge_i64(i64 %a, i64 %b) { -; CHECK: setp.ge.u64 %p[[P0:[0-9]+]], %rd{{[0-9]+}}, %rd{{[0-9]+}} -; CHECK: selp.b64 %rd{{[0-9]+}}, 1, 0, %p[[P0]] -; CHECK: ret +; CHECK-LABEL: icmp_uge_i64( +; CHECK: { +; CHECK-NEXT: .reg .pred %p<2>; +; CHECK-NEXT: .reg .b64 %rd<4>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b64 %rd1, [icmp_uge_i64_param_0]; +; CHECK-NEXT: ld.param.b64 %rd2, [icmp_uge_i64_param_1]; +; CHECK-NEXT: setp.ge.u64 %p1, %rd1, %rd2; +; CHECK-NEXT: selp.b64 %rd3, 1, 0, %p1; +; CHECK-NEXT: st.param.b64 [func_retval0], %rd3; +; CHECK-NEXT: ret; %cmp = icmp uge i64 %a, %b %ret = zext i1 %cmp to i64 ret i64 %ret } define i64 @icmp_ult_i64(i64 %a, i64 %b) { -; CHECK: setp.lt.u64 %p[[P0:[0-9]+]], %rd{{[0-9]+}}, %rd{{[0-9]+}} -; CHECK: selp.b64 %rd{{[0-9]+}}, 1, 0, %p[[P0]] -; CHECK: ret +; CHECK-LABEL: icmp_ult_i64( +; CHECK: { +; CHECK-NEXT: .reg .pred %p<2>; +; CHECK-NEXT: .reg .b64 %rd<4>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b64 %rd1, [icmp_ult_i64_param_0]; +; CHECK-NEXT: ld.param.b64 %rd2, [icmp_ult_i64_param_1]; +; CHECK-NEXT: setp.lt.u64 %p1, %rd1, %rd2; +; CHECK-NEXT: selp.b64 %rd3, 1, 0, %p1; +; CHECK-NEXT: st.param.b64 [func_retval0], %rd3; +; CHECK-NEXT: ret; %cmp = icmp ult i64 %a, %b %ret = zext i1 %cmp to i64 ret i64 %ret } define i64 @icmp_ule_i64(i64 %a, i64 %b) { -; CHECK: setp.le.u64 %p[[P0:[0-9]+]], %rd{{[0-9]+}}, %rd{{[0-9]+}} -; CHECK: selp.b64 %rd{{[0-9]+}}, 1, 0, %p[[P0]] -; CHECK: ret +; CHECK-LABEL: icmp_ule_i64( +; CHECK: { +; CHECK-NEXT: .reg .pred %p<2>; +; CHECK-NEXT: .reg .b64 %rd<4>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b64 %rd1, [icmp_ule_i64_param_0]; +; CHECK-NEXT: ld.param.b64 %rd2, [icmp_ule_i64_param_1]; +; CHECK-NEXT: setp.le.u64 %p1, %rd1, %rd2; +; CHECK-NEXT: selp.b64 %rd3, 1, 0, %p1; +; CHECK-NEXT: st.param.b64 [func_retval0], %rd3; +; CHECK-NEXT: ret; %cmp = icmp ule i64 %a, %b %ret = zext i1 %cmp to i64 ret i64 %ret } define i64 @icmp_sgt_i64(i64 %a, i64 %b) { -; CHECK: setp.gt.s64 %p[[P0:[0-9]+]], %rd{{[0-9]+}}, %rd{{[0-9]+}} -; CHECK: selp.b64 %rd{{[0-9]+}}, 1, 0, %p[[P0]] -; CHECK: ret +; CHECK-LABEL: icmp_sgt_i64( +; CHECK: { +; CHECK-NEXT: .reg .pred %p<2>; +; CHECK-NEXT: .reg .b64 %rd<4>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b64 %rd1, [icmp_sgt_i64_param_0]; +; CHECK-NEXT: ld.param.b64 %rd2, [icmp_sgt_i64_param_1]; +; CHECK-NEXT: setp.gt.s64 %p1, %rd1, %rd2; +; CHECK-NEXT: selp.b64 %rd3, 1, 0, %p1; +; CHECK-NEXT: st.param.b64 [func_retval0], %rd3; +; CHECK-NEXT: ret; %cmp = icmp sgt i64 %a, %b %ret = zext i1 %cmp to i64 ret i64 %ret } define i64 @icmp_sge_i64(i64 %a, i64 %b) { -; CHECK: setp.ge.s64 %p[[P0:[0-9]+]], %rd{{[0-9]+}}, %rd{{[0-9]+}} -; CHECK: selp.b64 %rd{{[0-9]+}}, 1, 0, %p[[P0]] -; CHECK: ret +; CHECK-LABEL: icmp_sge_i64( +; CHECK: { +; CHECK-NEXT: .reg .pred %p<2>; +; CHECK-NEXT: .reg .b64 %rd<4>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b64 %rd1, [icmp_sge_i64_param_0]; +; CHECK-NEXT: ld.param.b64 %rd2, [icmp_sge_i64_param_1]; +; CHECK-NEXT: setp.ge.s64 %p1, %rd1, %rd2; +; CHECK-NEXT: selp.b64 %rd3, 1, 0, %p1; +; CHECK-NEXT: st.param.b64 [func_retval0], %rd3; +; CHECK-NEXT: ret; %cmp = icmp sge i64 %a, %b %ret = zext i1 %cmp to i64 ret i64 %ret } define i64 @icmp_slt_i64(i64 %a, i64 %b) { -; CHECK: setp.lt.s64 %p[[P0:[0-9]+]], %rd{{[0-9]+}}, %rd{{[0-9]+}} -; CHECK: selp.b64 %rd{{[0-9]+}}, 1, 0, %p[[P0]] -; CHECK: ret +; CHECK-LABEL: icmp_slt_i64( +; CHECK: { +; CHECK-NEXT: .reg .pred %p<2>; +; CHECK-NEXT: .reg .b64 %rd<4>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b64 %rd1, [icmp_slt_i64_param_0]; +; CHECK-NEXT: ld.param.b64 %rd2, [icmp_slt_i64_param_1]; +; CHECK-NEXT: setp.lt.s64 %p1, %rd1, %rd2; +; CHECK-NEXT: selp.b64 %rd3, 1, 0, %p1; +; CHECK-NEXT: st.param.b64 [func_retval0], %rd3; +; CHECK-NEXT: ret; %cmp = icmp slt i64 %a, %b %ret = zext i1 %cmp to i64 ret i64 %ret } define i64 @icmp_sle_i64(i64 %a, i64 %b) { -; CHECK: setp.le.s64 %p[[P0:[0-9]+]], %rd{{[0-9]+}}, %rd{{[0-9]+}} -; CHECK: selp.b64 %rd{{[0-9]+}}, 1, 0, %p[[P0]] -; CHECK: ret +; CHECK-LABEL: icmp_sle_i64( +; CHECK: { +; CHECK-NEXT: .reg .pred %p<2>; +; CHECK-NEXT: .reg .b64 %rd<4>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b64 %rd1, [icmp_sle_i64_param_0]; +; CHECK-NEXT: ld.param.b64 %rd2, [icmp_sle_i64_param_1]; +; CHECK-NEXT: setp.le.s64 %p1, %rd1, %rd2; +; CHECK-NEXT: selp.b64 %rd3, 1, 0, %p1; +; CHECK-NEXT: st.param.b64 [func_retval0], %rd3; +; CHECK-NEXT: ret; %cmp = icmp sle i64 %a, %b %ret = zext i1 %cmp to i64 ret i64 %ret @@ -103,90 +194,180 @@ define i64 @icmp_sle_i64(i64 %a, i64 %b) { ;;; i32 define i32 @icmp_eq_i32(i32 %a, i32 %b) { -; CHECK: setp.eq.b32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, %r{{[0-9]+}} -; CHECK: selp.b32 %r{{[0-9]+}}, 1, 0, %p[[P0]] -; CHECK: ret +; CHECK-LABEL: icmp_eq_i32( +; CHECK: { +; CHECK-NEXT: .reg .pred %p<2>; +; CHECK-NEXT: .reg .b32 %r<4>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b32 %r1, [icmp_eq_i32_param_0]; +; CHECK-NEXT: ld.param.b32 %r2, [icmp_eq_i32_param_1]; +; CHECK-NEXT: setp.eq.b32 %p1, %r1, %r2; +; CHECK-NEXT: selp.b32 %r3, 1, 0, %p1; +; CHECK-NEXT: st.param.b32 [func_retval0], %r3; +; CHECK-NEXT: ret; %cmp = icmp eq i32 %a, %b %ret = zext i1 %cmp to i32 ret i32 %ret } define i32 @icmp_ne_i32(i32 %a, i32 %b) { -; CHECK: setp.ne.b32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, %r{{[0-9]+}} -; CHECK: selp.b32 %r{{[0-9]+}}, 1, 0, %p[[P0]] -; CHECK: ret +; CHECK-LABEL: icmp_ne_i32( +; CHECK: { +; CHECK-NEXT: .reg .pred %p<2>; +; CHECK-NEXT: .reg .b32 %r<4>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b32 %r1, [icmp_ne_i32_param_0]; +; CHECK-NEXT: ld.param.b32 %r2, [icmp_ne_i32_param_1]; +; CHECK-NEXT: setp.ne.b32 %p1, %r1, %r2; +; CHECK-NEXT: selp.b32 %r3, 1, 0, %p1; +; CHECK-NEXT: st.param.b32 [func_retval0], %r3; +; CHECK-NEXT: ret; %cmp = icmp ne i32 %a, %b %ret = zext i1 %cmp to i32 ret i32 %ret } define i32 @icmp_ugt_i32(i32 %a, i32 %b) { -; CHECK: setp.gt.u32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, %r{{[0-9]+}} -; CHECK: selp.b32 %r{{[0-9]+}}, 1, 0, %p[[P0]] -; CHECK: ret +; CHECK-LABEL: icmp_ugt_i32( +; CHECK: { +; CHECK-NEXT: .reg .pred %p<2>; +; CHECK-NEXT: .reg .b32 %r<4>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b32 %r1, [icmp_ugt_i32_param_0]; +; CHECK-NEXT: ld.param.b32 %r2, [icmp_ugt_i32_param_1]; +; CHECK-NEXT: setp.gt.u32 %p1, %r1, %r2; +; CHECK-NEXT: selp.b32 %r3, 1, 0, %p1; +; CHECK-NEXT: st.param.b32 [func_retval0], %r3; +; CHECK-NEXT: ret; %cmp = icmp ugt i32 %a, %b %ret = zext i1 %cmp to i32 ret i32 %ret } define i32 @icmp_uge_i32(i32 %a, i32 %b) { -; CHECK: setp.ge.u32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, %r{{[0-9]+}} -; CHECK: selp.b32 %r{{[0-9]+}}, 1, 0, %p[[P0]] -; CHECK: ret +; CHECK-LABEL: icmp_uge_i32( +; CHECK: { +; CHECK-NEXT: .reg .pred %p<2>; +; CHECK-NEXT: .reg .b32 %r<4>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b32 %r1, [icmp_uge_i32_param_0]; +; CHECK-NEXT: ld.param.b32 %r2, [icmp_uge_i32_param_1]; +; CHECK-NEXT: setp.ge.u32 %p1, %r1, %r2; +; CHECK-NEXT: selp.b32 %r3, 1, 0, %p1; +; CHECK-NEXT: st.param.b32 [func_retval0], %r3; +; CHECK-NEXT: ret; %cmp = icmp uge i32 %a, %b %ret = zext i1 %cmp to i32 ret i32 %ret } define i32 @icmp_ult_i32(i32 %a, i32 %b) { -; CHECK: setp.lt.u32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, %r{{[0-9]+}} -; CHECK: selp.b32 %r{{[0-9]+}}, 1, 0, %p[[P0]] -; CHECK: ret +; CHECK-LABEL: icmp_ult_i32( +; CHECK: { +; CHECK-NEXT: .reg .pred %p<2>; +; CHECK-NEXT: .reg .b32 %r<4>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b32 %r1, [icmp_ult_i32_param_0]; +; CHECK-NEXT: ld.param.b32 %r2, [icmp_ult_i32_param_1]; +; CHECK-NEXT: setp.lt.u32 %p1, %r1, %r2; +; CHECK-NEXT: selp.b32 %r3, 1, 0, %p1; +; CHECK-NEXT: st.param.b32 [func_retval0], %r3; +; CHECK-NEXT: ret; %cmp = icmp ult i32 %a, %b %ret = zext i1 %cmp to i32 ret i32 %ret } define i32 @icmp_ule_i32(i32 %a, i32 %b) { -; CHECK: setp.le.u32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, %r{{[0-9]+}} -; CHECK: selp.b32 %r{{[0-9]+}}, 1, 0, %p[[P0]] -; CHECK: ret +; CHECK-LABEL: icmp_ule_i32( +; CHECK: { +; CHECK-NEXT: .reg .pred %p<2>; +; CHECK-NEXT: .reg .b32 %r<4>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b32 %r1, [icmp_ule_i32_param_0]; +; CHECK-NEXT: ld.param.b32 %r2, [icmp_ule_i32_param_1]; +; CHECK-NEXT: setp.le.u32 %p1, %r1, %r2; +; CHECK-NEXT: selp.b32 %r3, 1, 0, %p1; +; CHECK-NEXT: st.param.b32 [func_retval0], %r3; +; CHECK-NEXT: ret; %cmp = icmp ule i32 %a, %b %ret = zext i1 %cmp to i32 ret i32 %ret } define i32 @icmp_sgt_i32(i32 %a, i32 %b) { -; CHECK: setp.gt.s32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, %r{{[0-9]+}} -; CHECK: selp.b32 %r{{[0-9]+}}, 1, 0, %p[[P0]] -; CHECK: ret +; CHECK-LABEL: icmp_sgt_i32( +; CHECK: { +; CHECK-NEXT: .reg .pred %p<2>; +; CHECK-NEXT: .reg .b32 %r<4>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b32 %r1, [icmp_sgt_i32_param_0]; +; CHECK-NEXT: ld.param.b32 %r2, [icmp_sgt_i32_param_1]; +; CHECK-NEXT: setp.gt.s32 %p1, %r1, %r2; +; CHECK-NEXT: selp.b32 %r3, 1, 0, %p1; +; CHECK-NEXT: st.param.b32 [func_retval0], %r3; +; CHECK-NEXT: ret; %cmp = icmp sgt i32 %a, %b %ret = zext i1 %cmp to i32 ret i32 %ret } define i32 @icmp_sge_i32(i32 %a, i32 %b) { -; CHECK: setp.ge.s32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, %r{{[0-9]+}} -; CHECK: selp.b32 %r{{[0-9]+}}, 1, 0, %p[[P0]] -; CHECK: ret +; CHECK-LABEL: icmp_sge_i32( +; CHECK: { +; CHECK-NEXT: .reg .pred %p<2>; +; CHECK-NEXT: .reg .b32 %r<4>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b32 %r1, [icmp_sge_i32_param_0]; +; CHECK-NEXT: ld.param.b32 %r2, [icmp_sge_i32_param_1]; +; CHECK-NEXT: setp.ge.s32 %p1, %r1, %r2; +; CHECK-NEXT: selp.b32 %r3, 1, 0, %p1; +; CHECK-NEXT: st.param.b32 [func_retval0], %r3; +; CHECK-NEXT: ret; %cmp = icmp sge i32 %a, %b %ret = zext i1 %cmp to i32 ret i32 %ret } define i32 @icmp_slt_i32(i32 %a, i32 %b) { -; CHECK: setp.lt.s32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, %r{{[0-9]+}} -; CHECK: selp.b32 %r{{[0-9]+}}, 1, 0, %p[[P0]] -; CHECK: ret +; CHECK-LABEL: icmp_slt_i32( +; CHECK: { +; CHECK-NEXT: .reg .pred %p<2>; +; CHECK-NEXT: .reg .b32 %r<4>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b32 %r1, [icmp_slt_i32_param_0]; +; CHECK-NEXT: ld.param.b32 %r2, [icmp_slt_i32_param_1]; +; CHECK-NEXT: setp.lt.s32 %p1, %r1, %r2; +; CHECK-NEXT: selp.b32 %r3, 1, 0, %p1; +; CHECK-NEXT: st.param.b32 [func_retval0], %r3; +; CHECK-NEXT: ret; %cmp = icmp slt i32 %a, %b %ret = zext i1 %cmp to i32 ret i32 %ret } define i32 @icmp_sle_i32(i32 %a, i32 %b) { -; CHECK: setp.le.s32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, %r{{[0-9]+}} -; CHECK: selp.b32 %r{{[0-9]+}}, 1, 0, %p[[P0]] -; CHECK: ret +; CHECK-LABEL: icmp_sle_i32( +; CHECK: { +; CHECK-NEXT: .reg .pred %p<2>; +; CHECK-NEXT: .reg .b32 %r<4>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b32 %r1, [icmp_sle_i32_param_0]; +; CHECK-NEXT: ld.param.b32 %r2, [icmp_sle_i32_param_1]; +; CHECK-NEXT: setp.le.s32 %p1, %r1, %r2; +; CHECK-NEXT: selp.b32 %r3, 1, 0, %p1; +; CHECK-NEXT: st.param.b32 [func_retval0], %r3; +; CHECK-NEXT: ret; %cmp = icmp sle i32 %a, %b %ret = zext i1 %cmp to i32 ret i32 %ret @@ -196,90 +377,190 @@ define i32 @icmp_sle_i32(i32 %a, i32 %b) { ;;; i16 define i16 @icmp_eq_i16(i16 %a, i16 %b) { -; CHECK: setp.eq.b16 %p[[P0:[0-9]+]], %rs{{[0-9]+}}, %rs{{[0-9]+}} -; CHECK: selp.b32 %r{{[0-9]+}}, 1, 0, %p[[P0]] -; CHECK: ret +; CHECK-LABEL: icmp_eq_i16( +; CHECK: { +; CHECK-NEXT: .reg .pred %p<2>; +; CHECK-NEXT: .reg .b16 %rs<3>; +; CHECK-NEXT: .reg .b32 %r<2>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b16 %rs1, [icmp_eq_i16_param_0]; +; CHECK-NEXT: ld.param.b16 %rs2, [icmp_eq_i16_param_1]; +; CHECK-NEXT: setp.eq.b16 %p1, %rs1, %rs2; +; CHECK-NEXT: selp.b32 %r1, 1, 0, %p1; +; CHECK-NEXT: st.param.b32 [func_retval0], %r1; +; CHECK-NEXT: ret; %cmp = icmp eq i16 %a, %b %ret = zext i1 %cmp to i16 ret i16 %ret } define i16 @icmp_ne_i16(i16 %a, i16 %b) { -; CHECK: setp.ne.b16 %p[[P0:[0-9]+]], %rs{{[0-9]+}}, %rs{{[0-9]+}} -; CHECK: selp.b32 %r{{[0-9]+}}, 1, 0, %p[[P0]] -; CHECK: ret +; CHECK-LABEL: icmp_ne_i16( +; CHECK: { +; CHECK-NEXT: .reg .pred %p<2>; +; CHECK-NEXT: .reg .b16 %rs<3>; +; CHECK-NEXT: .reg .b32 %r<2>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b16 %rs1, [icmp_ne_i16_param_0]; +; CHECK-NEXT: ld.param.b16 %rs2, [icmp_ne_i16_param_1]; +; CHECK-NEXT: setp.ne.b16 %p1, %rs1, %rs2; +; CHECK-NEXT: selp.b32 %r1, 1, 0, %p1; +; CHECK-NEXT: st.param.b32 [func_retval0], %r1; +; CHECK-NEXT: ret; %cmp = icmp ne i16 %a, %b %ret = zext i1 %cmp to i16 ret i16 %ret } define i16 @icmp_ugt_i16(i16 %a, i16 %b) { -; CHECK: setp.gt.u16 %p[[P0:[0-9]+]], %rs{{[0-9]+}}, %rs{{[0-9]+}} -; CHECK: selp.b32 %r{{[0-9]+}}, 1, 0, %p[[P0]] -; CHECK: ret +; CHECK-LABEL: icmp_ugt_i16( +; CHECK: { +; CHECK-NEXT: .reg .pred %p<2>; +; CHECK-NEXT: .reg .b16 %rs<3>; +; CHECK-NEXT: .reg .b32 %r<2>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b16 %rs1, [icmp_ugt_i16_param_0]; +; CHECK-NEXT: ld.param.b16 %rs2, [icmp_ugt_i16_param_1]; +; CHECK-NEXT: setp.gt.u16 %p1, %rs1, %rs2; +; CHECK-NEXT: selp.b32 %r1, 1, 0, %p1; +; CHECK-NEXT: st.param.b32 [func_retval0], %r1; +; CHECK-NEXT: ret; %cmp = icmp ugt i16 %a, %b %ret = zext i1 %cmp to i16 ret i16 %ret } define i16 @icmp_uge_i16(i16 %a, i16 %b) { -; CHECK: setp.ge.u16 %p[[P0:[0-9]+]], %rs{{[0-9]+}}, %rs{{[0-9]+}} -; CHECK: selp.b32 %r{{[0-9]+}}, 1, 0, %p[[P0]] -; CHECK: ret +; CHECK-LABEL: icmp_uge_i16( +; CHECK: { +; CHECK-NEXT: .reg .pred %p<2>; +; CHECK-NEXT: .reg .b16 %rs<3>; +; CHECK-NEXT: .reg .b32 %r<2>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b16 %rs1, [icmp_uge_i16_param_0]; +; CHECK-NEXT: ld.param.b16 %rs2, [icmp_uge_i16_param_1]; +; CHECK-NEXT: setp.ge.u16 %p1, %rs1, %rs2; +; CHECK-NEXT: selp.b32 %r1, 1, 0, %p1; +; CHECK-NEXT: st.param.b32 [func_retval0], %r1; +; CHECK-NEXT: ret; %cmp = icmp uge i16 %a, %b %ret = zext i1 %cmp to i16 ret i16 %ret } define i16 @icmp_ult_i16(i16 %a, i16 %b) { -; CHECK: setp.lt.u16 %p[[P0:[0-9]+]], %rs{{[0-9]+}}, %rs{{[0-9]+}} -; CHECK: selp.b32 %r{{[0-9]+}}, 1, 0, %p[[P0]] -; CHECK: ret +; CHECK-LABEL: icmp_ult_i16( +; CHECK: { +; CHECK-NEXT: .reg .pred %p<2>; +; CHECK-NEXT: .reg .b16 %rs<3>; +; CHECK-NEXT: .reg .b32 %r<2>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b16 %rs1, [icmp_ult_i16_param_0]; +; CHECK-NEXT: ld.param.b16 %rs2, [icmp_ult_i16_param_1]; +; CHECK-NEXT: setp.lt.u16 %p1, %rs1, %rs2; +; CHECK-NEXT: selp.b32 %r1, 1, 0, %p1; +; CHECK-NEXT: st.param.b32 [func_retval0], %r1; +; CHECK-NEXT: ret; %cmp = icmp ult i16 %a, %b %ret = zext i1 %cmp to i16 ret i16 %ret } define i16 @icmp_ule_i16(i16 %a, i16 %b) { -; CHECK: setp.le.u16 %p[[P0:[0-9]+]], %rs{{[0-9]+}}, %rs{{[0-9]+}} -; CHECK: selp.b32 %r{{[0-9]+}}, 1, 0, %p[[P0]] -; CHECK: ret +; CHECK-LABEL: icmp_ule_i16( +; CHECK: { +; CHECK-NEXT: .reg .pred %p<2>; +; CHECK-NEXT: .reg .b16 %rs<3>; +; CHECK-NEXT: .reg .b32 %r<2>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b16 %rs1, [icmp_ule_i16_param_0]; +; CHECK-NEXT: ld.param.b16 %rs2, [icmp_ule_i16_param_1]; +; CHECK-NEXT: setp.le.u16 %p1, %rs1, %rs2; +; CHECK-NEXT: selp.b32 %r1, 1, 0, %p1; +; CHECK-NEXT: st.param.b32 [func_retval0], %r1; +; CHECK-NEXT: ret; %cmp = icmp ule i16 %a, %b %ret = zext i1 %cmp to i16 ret i16 %ret } define i16 @icmp_sgt_i16(i16 %a, i16 %b) { -; CHECK: setp.gt.s16 %p[[P0:[0-9]+]], %rs{{[0-9]+}}, %rs{{[0-9]+}} -; CHECK: selp.b32 %r{{[0-9]+}}, 1, 0, %p[[P0]] -; CHECK: ret +; CHECK-LABEL: icmp_sgt_i16( +; CHECK: { +; CHECK-NEXT: .reg .pred %p<2>; +; CHECK-NEXT: .reg .b16 %rs<3>; +; CHECK-NEXT: .reg .b32 %r<2>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b16 %rs1, [icmp_sgt_i16_param_0]; +; CHECK-NEXT: ld.param.b16 %rs2, [icmp_sgt_i16_param_1]; +; CHECK-NEXT: setp.gt.s16 %p1, %rs1, %rs2; +; CHECK-NEXT: selp.b32 %r1, 1, 0, %p1; +; CHECK-NEXT: st.param.b32 [func_retval0], %r1; +; CHECK-NEXT: ret; %cmp = icmp sgt i16 %a, %b %ret = zext i1 %cmp to i16 ret i16 %ret } define i16 @icmp_sge_i16(i16 %a, i16 %b) { -; CHECK: setp.ge.s16 %p[[P0:[0-9]+]], %rs{{[0-9]+}}, %rs{{[0-9]+}} -; CHECK: selp.b32 %r{{[0-9]+}}, 1, 0, %p[[P0]] -; CHECK: ret +; CHECK-LABEL: icmp_sge_i16( +; CHECK: { +; CHECK-NEXT: .reg .pred %p<2>; +; CHECK-NEXT: .reg .b16 %rs<3>; +; CHECK-NEXT: .reg .b32 %r<2>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b16 %rs1, [icmp_sge_i16_param_0]; +; CHECK-NEXT: ld.param.b16 %rs2, [icmp_sge_i16_param_1]; +; CHECK-NEXT: setp.ge.s16 %p1, %rs1, %rs2; +; CHECK-NEXT: selp.b32 %r1, 1, 0, %p1; +; CHECK-NEXT: st.param.b32 [func_retval0], %r1; +; CHECK-NEXT: ret; %cmp = icmp sge i16 %a, %b %ret = zext i1 %cmp to i16 ret i16 %ret } define i16 @icmp_slt_i16(i16 %a, i16 %b) { -; CHECK: setp.lt.s16 %p[[P0:[0-9]+]], %rs{{[0-9]+}}, %rs{{[0-9]+}} -; CHECK: selp.b32 %r{{[0-9]+}}, 1, 0, %p[[P0]] -; CHECK: ret +; CHECK-LABEL: icmp_slt_i16( +; CHECK: { +; CHECK-NEXT: .reg .pred %p<2>; +; CHECK-NEXT: .reg .b16 %rs<3>; +; CHECK-NEXT: .reg .b32 %r<2>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b16 %rs1, [icmp_slt_i16_param_0]; +; CHECK-NEXT: ld.param.b16 %rs2, [icmp_slt_i16_param_1]; +; CHECK-NEXT: setp.lt.s16 %p1, %rs1, %rs2; +; CHECK-NEXT: selp.b32 %r1, 1, 0, %p1; +; CHECK-NEXT: st.param.b32 [func_retval0], %r1; +; CHECK-NEXT: ret; %cmp = icmp slt i16 %a, %b %ret = zext i1 %cmp to i16 ret i16 %ret } define i16 @icmp_sle_i16(i16 %a, i16 %b) { -; CHECK: setp.le.s16 %p[[P0:[0-9]+]], %rs{{[0-9]+}}, %rs{{[0-9]+}} -; CHECK: selp.b32 %r{{[0-9]+}}, 1, 0, %p[[P0]] -; CHECK: ret +; CHECK-LABEL: icmp_sle_i16( +; CHECK: { +; CHECK-NEXT: .reg .pred %p<2>; +; CHECK-NEXT: .reg .b16 %rs<3>; +; CHECK-NEXT: .reg .b32 %r<2>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b16 %rs1, [icmp_sle_i16_param_0]; +; CHECK-NEXT: ld.param.b16 %rs2, [icmp_sle_i16_param_1]; +; CHECK-NEXT: setp.le.s16 %p1, %rs1, %rs2; +; CHECK-NEXT: selp.b32 %r1, 1, 0, %p1; +; CHECK-NEXT: st.param.b32 [func_retval0], %r1; +; CHECK-NEXT: ret; %cmp = icmp sle i16 %a, %b %ret = zext i1 %cmp to i16 ret i16 %ret @@ -290,9 +571,19 @@ define i16 @icmp_sle_i16(i16 %a, i16 %b) { define i8 @icmp_eq_i8(i8 %a, i8 %b) { ; Comparison happens in 16-bit -; CHECK: setp.eq.b16 %p[[P0:[0-9]+]], %rs{{[0-9]+}}, %rs{{[0-9]+}} -; CHECK: selp.b32 %r{{[0-9]+}}, 1, 0, %p[[P0]] -; CHECK: ret +; CHECK-LABEL: icmp_eq_i8( +; CHECK: { +; CHECK-NEXT: .reg .pred %p<2>; +; CHECK-NEXT: .reg .b16 %rs<3>; +; CHECK-NEXT: .reg .b32 %r<2>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b8 %rs1, [icmp_eq_i8_param_0]; +; CHECK-NEXT: ld.param.b8 %rs2, [icmp_eq_i8_param_1]; +; CHECK-NEXT: setp.eq.b16 %p1, %rs1, %rs2; +; CHECK-NEXT: selp.b32 %r1, 1, 0, %p1; +; CHECK-NEXT: st.param.b32 [func_retval0], %r1; +; CHECK-NEXT: ret; %cmp = icmp eq i8 %a, %b %ret = zext i1 %cmp to i8 ret i8 %ret @@ -300,9 +591,19 @@ define i8 @icmp_eq_i8(i8 %a, i8 %b) { define i8 @icmp_ne_i8(i8 %a, i8 %b) { ; Comparison happens in 16-bit -; CHECK: setp.ne.b16 %p[[P0:[0-9]+]], %rs{{[0-9]+}}, %rs{{[0-9]+}} -; CHECK: selp.b32 %r{{[0-9]+}}, 1, 0, %p[[P0]] -; CHECK: ret +; CHECK-LABEL: icmp_ne_i8( +; CHECK: { +; CHECK-NEXT: .reg .pred %p<2>; +; CHECK-NEXT: .reg .b16 %rs<3>; +; CHECK-NEXT: .reg .b32 %r<2>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b8 %rs1, [icmp_ne_i8_param_0]; +; CHECK-NEXT: ld.param.b8 %rs2, [icmp_ne_i8_param_1]; +; CHECK-NEXT: setp.ne.b16 %p1, %rs1, %rs2; +; CHECK-NEXT: selp.b32 %r1, 1, 0, %p1; +; CHECK-NEXT: st.param.b32 [func_retval0], %r1; +; CHECK-NEXT: ret; %cmp = icmp ne i8 %a, %b %ret = zext i1 %cmp to i8 ret i8 %ret @@ -310,9 +611,19 @@ define i8 @icmp_ne_i8(i8 %a, i8 %b) { define i8 @icmp_ugt_i8(i8 %a, i8 %b) { ; Comparison happens in 16-bit -; CHECK: setp.gt.u16 %p[[P0:[0-9]+]], %rs{{[0-9]+}}, %rs{{[0-9]+}} -; CHECK: selp.b32 %r{{[0-9]+}}, 1, 0, %p[[P0]] -; CHECK: ret +; CHECK-LABEL: icmp_ugt_i8( +; CHECK: { +; CHECK-NEXT: .reg .pred %p<2>; +; CHECK-NEXT: .reg .b16 %rs<3>; +; CHECK-NEXT: .reg .b32 %r<2>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b8 %rs1, [icmp_ugt_i8_param_0]; +; CHECK-NEXT: ld.param.b8 %rs2, [icmp_ugt_i8_param_1]; +; CHECK-NEXT: setp.gt.u16 %p1, %rs1, %rs2; +; CHECK-NEXT: selp.b32 %r1, 1, 0, %p1; +; CHECK-NEXT: st.param.b32 [func_retval0], %r1; +; CHECK-NEXT: ret; %cmp = icmp ugt i8 %a, %b %ret = zext i1 %cmp to i8 ret i8 %ret @@ -320,9 +631,19 @@ define i8 @icmp_ugt_i8(i8 %a, i8 %b) { define i8 @icmp_uge_i8(i8 %a, i8 %b) { ; Comparison happens in 16-bit -; CHECK: setp.ge.u16 %p[[P0:[0-9]+]], %rs{{[0-9]+}}, %rs{{[0-9]+}} -; CHECK: selp.b32 %r{{[0-9]+}}, 1, 0, %p[[P0]] -; CHECK: ret +; CHECK-LABEL: icmp_uge_i8( +; CHECK: { +; CHECK-NEXT: .reg .pred %p<2>; +; CHECK-NEXT: .reg .b16 %rs<3>; +; CHECK-NEXT: .reg .b32 %r<2>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b8 %rs1, [icmp_uge_i8_param_0]; +; CHECK-NEXT: ld.param.b8 %rs2, [icmp_uge_i8_param_1]; +; CHECK-NEXT: setp.ge.u16 %p1, %rs1, %rs2; +; CHECK-NEXT: selp.b32 %r1, 1, 0, %p1; +; CHECK-NEXT: st.param.b32 [func_retval0], %r1; +; CHECK-NEXT: ret; %cmp = icmp uge i8 %a, %b %ret = zext i1 %cmp to i8 ret i8 %ret @@ -330,9 +651,19 @@ define i8 @icmp_uge_i8(i8 %a, i8 %b) { define i8 @icmp_ult_i8(i8 %a, i8 %b) { ; Comparison happens in 16-bit -; CHECK: setp.lt.u16 %p[[P0:[0-9]+]], %rs{{[0-9]+}}, %rs{{[0-9]+}} -; CHECK: selp.b32 %r{{[0-9]+}}, 1, 0, %p[[P0]] -; CHECK: ret +; CHECK-LABEL: icmp_ult_i8( +; CHECK: { +; CHECK-NEXT: .reg .pred %p<2>; +; CHECK-NEXT: .reg .b16 %rs<3>; +; CHECK-NEXT: .reg .b32 %r<2>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b8 %rs1, [icmp_ult_i8_param_0]; +; CHECK-NEXT: ld.param.b8 %rs2, [icmp_ult_i8_param_1]; +; CHECK-NEXT: setp.lt.u16 %p1, %rs1, %rs2; +; CHECK-NEXT: selp.b32 %r1, 1, 0, %p1; +; CHECK-NEXT: st.param.b32 [func_retval0], %r1; +; CHECK-NEXT: ret; %cmp = icmp ult i8 %a, %b %ret = zext i1 %cmp to i8 ret i8 %ret @@ -340,9 +671,19 @@ define i8 @icmp_ult_i8(i8 %a, i8 %b) { define i8 @icmp_ule_i8(i8 %a, i8 %b) { ; Comparison happens in 16-bit -; CHECK: setp.le.u16 %p[[P0:[0-9]+]], %rs{{[0-9]+}}, %rs{{[0-9]+}} -; CHECK: selp.b32 %r{{[0-9]+}}, 1, 0, %p[[P0]] -; CHECK: ret +; CHECK-LABEL: icmp_ule_i8( +; CHECK: { +; CHECK-NEXT: .reg .pred %p<2>; +; CHECK-NEXT: .reg .b16 %rs<3>; +; CHECK-NEXT: .reg .b32 %r<2>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b8 %rs1, [icmp_ule_i8_param_0]; +; CHECK-NEXT: ld.param.b8 %rs2, [icmp_ule_i8_param_1]; +; CHECK-NEXT: setp.le.u16 %p1, %rs1, %rs2; +; CHECK-NEXT: selp.b32 %r1, 1, 0, %p1; +; CHECK-NEXT: st.param.b32 [func_retval0], %r1; +; CHECK-NEXT: ret; %cmp = icmp ule i8 %a, %b %ret = zext i1 %cmp to i8 ret i8 %ret @@ -350,9 +691,19 @@ define i8 @icmp_ule_i8(i8 %a, i8 %b) { define i8 @icmp_sgt_i8(i8 %a, i8 %b) { ; Comparison happens in 16-bit -; CHECK: setp.gt.s16 %p[[P0:[0-9]+]], %rs{{[0-9]+}}, %rs{{[0-9]+}} -; CHECK: selp.b32 %r{{[0-9]+}}, 1, 0, %p[[P0]] -; CHECK: ret +; CHECK-LABEL: icmp_sgt_i8( +; CHECK: { +; CHECK-NEXT: .reg .pred %p<2>; +; CHECK-NEXT: .reg .b16 %rs<3>; +; CHECK-NEXT: .reg .b32 %r<2>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.s8 %rs1, [icmp_sgt_i8_param_0]; +; CHECK-NEXT: ld.param.s8 %rs2, [icmp_sgt_i8_param_1]; +; CHECK-NEXT: setp.gt.s16 %p1, %rs1, %rs2; +; CHECK-NEXT: selp.b32 %r1, 1, 0, %p1; +; CHECK-NEXT: st.param.b32 [func_retval0], %r1; +; CHECK-NEXT: ret; %cmp = icmp sgt i8 %a, %b %ret = zext i1 %cmp to i8 ret i8 %ret @@ -360,9 +711,19 @@ define i8 @icmp_sgt_i8(i8 %a, i8 %b) { define i8 @icmp_sge_i8(i8 %a, i8 %b) { ; Comparison happens in 16-bit -; CHECK: setp.ge.s16 %p[[P0:[0-9]+]], %rs{{[0-9]+}}, %rs{{[0-9]+}} -; CHECK: selp.b32 %r{{[0-9]+}}, 1, 0, %p[[P0]] -; CHECK: ret +; CHECK-LABEL: icmp_sge_i8( +; CHECK: { +; CHECK-NEXT: .reg .pred %p<2>; +; CHECK-NEXT: .reg .b16 %rs<3>; +; CHECK-NEXT: .reg .b32 %r<2>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.s8 %rs1, [icmp_sge_i8_param_0]; +; CHECK-NEXT: ld.param.s8 %rs2, [icmp_sge_i8_param_1]; +; CHECK-NEXT: setp.ge.s16 %p1, %rs1, %rs2; +; CHECK-NEXT: selp.b32 %r1, 1, 0, %p1; +; CHECK-NEXT: st.param.b32 [func_retval0], %r1; +; CHECK-NEXT: ret; %cmp = icmp sge i8 %a, %b %ret = zext i1 %cmp to i8 ret i8 %ret @@ -370,9 +731,19 @@ define i8 @icmp_sge_i8(i8 %a, i8 %b) { define i8 @icmp_slt_i8(i8 %a, i8 %b) { ; Comparison happens in 16-bit -; CHECK: setp.lt.s16 %p[[P0:[0-9]+]], %rs{{[0-9]+}}, %rs{{[0-9]+}} -; CHECK: selp.b32 %r{{[0-9]+}}, 1, 0, %p[[P0]] -; CHECK: ret +; CHECK-LABEL: icmp_slt_i8( +; CHECK: { +; CHECK-NEXT: .reg .pred %p<2>; +; CHECK-NEXT: .reg .b16 %rs<3>; +; CHECK-NEXT: .reg .b32 %r<2>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.s8 %rs1, [icmp_slt_i8_param_0]; +; CHECK-NEXT: ld.param.s8 %rs2, [icmp_slt_i8_param_1]; +; CHECK-NEXT: setp.lt.s16 %p1, %rs1, %rs2; +; CHECK-NEXT: selp.b32 %r1, 1, 0, %p1; +; CHECK-NEXT: st.param.b32 [func_retval0], %r1; +; CHECK-NEXT: ret; %cmp = icmp slt i8 %a, %b %ret = zext i1 %cmp to i8 ret i8 %ret @@ -380,9 +751,19 @@ define i8 @icmp_slt_i8(i8 %a, i8 %b) { define i8 @icmp_sle_i8(i8 %a, i8 %b) { ; Comparison happens in 16-bit -; CHECK: setp.le.s16 %p[[P0:[0-9]+]], %rs{{[0-9]+}}, %rs{{[0-9]+}} -; CHECK: selp.b32 %r{{[0-9]+}}, 1, 0, %p[[P0]] -; CHECK: ret +; CHECK-LABEL: icmp_sle_i8( +; CHECK: { +; CHECK-NEXT: .reg .pred %p<2>; +; CHECK-NEXT: .reg .b16 %rs<3>; +; CHECK-NEXT: .reg .b32 %r<2>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.s8 %rs1, [icmp_sle_i8_param_0]; +; CHECK-NEXT: ld.param.s8 %rs2, [icmp_sle_i8_param_1]; +; CHECK-NEXT: setp.le.s16 %p1, %rs1, %rs2; +; CHECK-NEXT: selp.b32 %r1, 1, 0, %p1; +; CHECK-NEXT: st.param.b32 [func_retval0], %r1; +; CHECK-NEXT: ret; %cmp = icmp sle i8 %a, %b %ret = zext i1 %cmp to i8 ret i8 %ret diff --git a/llvm/test/CodeGen/NVPTX/convert-call-to-indirect.ll b/llvm/test/CodeGen/NVPTX/convert-call-to-indirect.ll index d1b478d..48209a8 100644 --- a/llvm/test/CodeGen/NVPTX/convert-call-to-indirect.ll +++ b/llvm/test/CodeGen/NVPTX/convert-call-to-indirect.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc < %s -march=nvptx64 -mcpu=sm_90 | FileCheck %s ; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_90 | %ptxas-verify -arch=sm_90 %} @@ -7,52 +8,203 @@ declare i64 @callee_variadic(ptr %p, ...); define %struct.64 @test_return_type_mismatch(ptr %p) { ; CHECK-LABEL: test_return_type_mismatch( -; CHECK: .param .align 1 .b8 retval0[8]; +; CHECK: { +; CHECK-NEXT: .reg .b64 %rd<40>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b64 %rd2, [test_return_type_mismatch_param_0]; +; CHECK-NEXT: { // callseq 0, 0 +; CHECK-NEXT: .param .b64 param0; +; CHECK-NEXT: .param .align 1 .b8 retval0[8]; +; CHECK-NEXT: st.param.b64 [param0], %rd2; ; CHECK-NEXT: prototype_0 : .callprototype (.param .align 1 .b8 _[8]) _ (.param .b64 _); -; CHECK-NEXT: call (retval0), %rd{{[0-9]+}}, (param0), prototype_0; +; CHECK-NEXT: mov.b64 %rd1, callee; +; CHECK-NEXT: call (retval0), %rd1, (param0), prototype_0; +; CHECK-NEXT: ld.param.b8 %rd3, [retval0+7]; +; CHECK-NEXT: ld.param.b8 %rd4, [retval0+6]; +; CHECK-NEXT: ld.param.b8 %rd5, [retval0+5]; +; CHECK-NEXT: ld.param.b8 %rd6, [retval0+4]; +; CHECK-NEXT: ld.param.b8 %rd7, [retval0+3]; +; CHECK-NEXT: ld.param.b8 %rd8, [retval0+2]; +; CHECK-NEXT: ld.param.b8 %rd9, [retval0+1]; +; CHECK-NEXT: ld.param.b8 %rd10, [retval0]; +; CHECK-NEXT: } // callseq 0 +; CHECK-NEXT: shl.b64 %rd13, %rd9, 8; +; CHECK-NEXT: or.b64 %rd14, %rd13, %rd10; +; CHECK-NEXT: shl.b64 %rd16, %rd8, 16; +; CHECK-NEXT: shl.b64 %rd18, %rd7, 24; +; CHECK-NEXT: or.b64 %rd19, %rd18, %rd16; +; CHECK-NEXT: or.b64 %rd20, %rd19, %rd14; +; CHECK-NEXT: shl.b64 %rd23, %rd5, 8; +; CHECK-NEXT: or.b64 %rd24, %rd23, %rd6; +; CHECK-NEXT: shl.b64 %rd26, %rd4, 16; +; CHECK-NEXT: shl.b64 %rd28, %rd3, 24; +; CHECK-NEXT: or.b64 %rd29, %rd28, %rd26; +; CHECK-NEXT: or.b64 %rd30, %rd29, %rd24; +; CHECK-NEXT: shl.b64 %rd31, %rd30, 32; +; CHECK-NEXT: or.b64 %rd32, %rd31, %rd20; +; CHECK-NEXT: st.param.b8 [func_retval0], %rd10; +; CHECK-NEXT: shr.u64 %rd33, %rd32, 56; +; CHECK-NEXT: st.param.b8 [func_retval0+7], %rd33; +; CHECK-NEXT: shr.u64 %rd34, %rd32, 48; +; CHECK-NEXT: st.param.b8 [func_retval0+6], %rd34; +; CHECK-NEXT: shr.u64 %rd35, %rd32, 40; +; CHECK-NEXT: st.param.b8 [func_retval0+5], %rd35; +; CHECK-NEXT: shr.u64 %rd36, %rd32, 32; +; CHECK-NEXT: st.param.b8 [func_retval0+4], %rd36; +; CHECK-NEXT: shr.u64 %rd37, %rd32, 24; +; CHECK-NEXT: st.param.b8 [func_retval0+3], %rd37; +; CHECK-NEXT: shr.u64 %rd38, %rd32, 16; +; CHECK-NEXT: st.param.b8 [func_retval0+2], %rd38; +; CHECK-NEXT: shr.u64 %rd39, %rd32, 8; +; CHECK-NEXT: st.param.b8 [func_retval0+1], %rd39; +; CHECK-NEXT: ret; %ret = call %struct.64 @callee(ptr %p) ret %struct.64 %ret } define i64 @test_param_type_mismatch(ptr %p) { ; CHECK-LABEL: test_param_type_mismatch( -; CHECK: .param .b64 retval0; +; CHECK: { +; CHECK-NEXT: .reg .b64 %rd<4>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: { // callseq 1, 0 +; CHECK-NEXT: .param .b64 param0; +; CHECK-NEXT: .param .b64 retval0; ; CHECK-NEXT: prototype_1 : .callprototype (.param .b64 _) _ (.param .b64 _); -; CHECK-NEXT: call (retval0), %rd{{[0-9]+}}, (param0), prototype_1; +; CHECK-NEXT: st.param.b64 [param0], 7; +; CHECK-NEXT: mov.b64 %rd1, callee; +; CHECK-NEXT: call (retval0), %rd1, (param0), prototype_1; +; CHECK-NEXT: ld.param.b64 %rd2, [retval0]; +; CHECK-NEXT: } // callseq 1 +; CHECK-NEXT: st.param.b64 [func_retval0], %rd2; +; CHECK-NEXT: ret; %ret = call i64 @callee(i64 7) ret i64 %ret } define i64 @test_param_count_mismatch(ptr %p) { ; CHECK-LABEL: test_param_count_mismatch( -; CHECK: .param .b64 retval0; +; CHECK: { +; CHECK-NEXT: .reg .b64 %rd<5>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b64 %rd2, [test_param_count_mismatch_param_0]; +; CHECK-NEXT: { // callseq 2, 0 +; CHECK-NEXT: .param .b64 param0; +; CHECK-NEXT: .param .b64 param1; +; CHECK-NEXT: .param .b64 retval0; +; CHECK-NEXT: st.param.b64 [param0], %rd2; ; CHECK-NEXT: prototype_2 : .callprototype (.param .b64 _) _ (.param .b64 _, .param .b64 _); -; CHECK-NEXT: call (retval0), %rd{{[0-9]+}}, (param0, param1), prototype_2; +; CHECK-NEXT: st.param.b64 [param1], 7; +; CHECK-NEXT: mov.b64 %rd1, callee; +; CHECK-NEXT: call (retval0), %rd1, (param0, param1), prototype_2; +; CHECK-NEXT: ld.param.b64 %rd3, [retval0]; +; CHECK-NEXT: } // callseq 2 +; CHECK-NEXT: st.param.b64 [func_retval0], %rd3; +; CHECK-NEXT: ret; %ret = call i64 @callee(ptr %p, i64 7) ret i64 %ret } define %struct.64 @test_return_type_mismatch_variadic(ptr %p) { ; CHECK-LABEL: test_return_type_mismatch_variadic( -; CHECK: .param .align 1 .b8 retval0[8]; +; CHECK: { +; CHECK-NEXT: .reg .b64 %rd<40>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b64 %rd2, [test_return_type_mismatch_variadic_param_0]; +; CHECK-NEXT: { // callseq 3, 0 +; CHECK-NEXT: .param .b64 param0; +; CHECK-NEXT: .param .align 1 .b8 retval0[8]; +; CHECK-NEXT: st.param.b64 [param0], %rd2; ; CHECK-NEXT: prototype_3 : .callprototype (.param .align 1 .b8 _[8]) _ (.param .b64 _); -; CHECK-NEXT: call (retval0), %rd{{[0-9]+}}, (param0), prototype_3; +; CHECK-NEXT: mov.b64 %rd1, callee_variadic; +; CHECK-NEXT: call (retval0), %rd1, (param0), prototype_3; +; CHECK-NEXT: ld.param.b8 %rd3, [retval0+7]; +; CHECK-NEXT: ld.param.b8 %rd4, [retval0+6]; +; CHECK-NEXT: ld.param.b8 %rd5, [retval0+5]; +; CHECK-NEXT: ld.param.b8 %rd6, [retval0+4]; +; CHECK-NEXT: ld.param.b8 %rd7, [retval0+3]; +; CHECK-NEXT: ld.param.b8 %rd8, [retval0+2]; +; CHECK-NEXT: ld.param.b8 %rd9, [retval0+1]; +; CHECK-NEXT: ld.param.b8 %rd10, [retval0]; +; CHECK-NEXT: } // callseq 3 +; CHECK-NEXT: shl.b64 %rd13, %rd9, 8; +; CHECK-NEXT: or.b64 %rd14, %rd13, %rd10; +; CHECK-NEXT: shl.b64 %rd16, %rd8, 16; +; CHECK-NEXT: shl.b64 %rd18, %rd7, 24; +; CHECK-NEXT: or.b64 %rd19, %rd18, %rd16; +; CHECK-NEXT: or.b64 %rd20, %rd19, %rd14; +; CHECK-NEXT: shl.b64 %rd23, %rd5, 8; +; CHECK-NEXT: or.b64 %rd24, %rd23, %rd6; +; CHECK-NEXT: shl.b64 %rd26, %rd4, 16; +; CHECK-NEXT: shl.b64 %rd28, %rd3, 24; +; CHECK-NEXT: or.b64 %rd29, %rd28, %rd26; +; CHECK-NEXT: or.b64 %rd30, %rd29, %rd24; +; CHECK-NEXT: shl.b64 %rd31, %rd30, 32; +; CHECK-NEXT: or.b64 %rd32, %rd31, %rd20; +; CHECK-NEXT: st.param.b8 [func_retval0], %rd10; +; CHECK-NEXT: shr.u64 %rd33, %rd32, 56; +; CHECK-NEXT: st.param.b8 [func_retval0+7], %rd33; +; CHECK-NEXT: shr.u64 %rd34, %rd32, 48; +; CHECK-NEXT: st.param.b8 [func_retval0+6], %rd34; +; CHECK-NEXT: shr.u64 %rd35, %rd32, 40; +; CHECK-NEXT: st.param.b8 [func_retval0+5], %rd35; +; CHECK-NEXT: shr.u64 %rd36, %rd32, 32; +; CHECK-NEXT: st.param.b8 [func_retval0+4], %rd36; +; CHECK-NEXT: shr.u64 %rd37, %rd32, 24; +; CHECK-NEXT: st.param.b8 [func_retval0+3], %rd37; +; CHECK-NEXT: shr.u64 %rd38, %rd32, 16; +; CHECK-NEXT: st.param.b8 [func_retval0+2], %rd38; +; CHECK-NEXT: shr.u64 %rd39, %rd32, 8; +; CHECK-NEXT: st.param.b8 [func_retval0+1], %rd39; +; CHECK-NEXT: ret; %ret = call %struct.64 (ptr, ...) @callee_variadic(ptr %p) ret %struct.64 %ret } define i64 @test_param_type_mismatch_variadic(ptr %p) { ; CHECK-LABEL: test_param_type_mismatch_variadic( -; CHECK: .param .b64 retval0; +; CHECK: { +; CHECK-NEXT: .reg .b64 %rd<4>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b64 %rd1, [test_param_type_mismatch_variadic_param_0]; +; CHECK-NEXT: { // callseq 4, 0 +; CHECK-NEXT: .param .align 8 .b8 param1[8]; +; CHECK-NEXT: .param .b64 param0; +; CHECK-NEXT: .param .b64 retval0; +; CHECK-NEXT: st.param.b64 [param0], %rd1; +; CHECK-NEXT: st.param.b64 [param1], 7; ; CHECK-NEXT: call.uni (retval0), callee_variadic, (param0, param1); +; CHECK-NEXT: ld.param.b64 %rd2, [retval0]; +; CHECK-NEXT: } // callseq 4 +; CHECK-NEXT: st.param.b64 [func_retval0], %rd2; +; CHECK-NEXT: ret; %ret = call i64 (ptr, ...) @callee_variadic(ptr %p, i64 7) ret i64 %ret } define i64 @test_param_count_mismatch_variadic(ptr %p) { ; CHECK-LABEL: test_param_count_mismatch_variadic( -; CHECK: .param .b64 retval0; +; CHECK: { +; CHECK-NEXT: .reg .b64 %rd<4>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b64 %rd1, [test_param_count_mismatch_variadic_param_0]; +; CHECK-NEXT: { // callseq 5, 0 +; CHECK-NEXT: .param .align 8 .b8 param1[8]; +; CHECK-NEXT: .param .b64 param0; +; CHECK-NEXT: .param .b64 retval0; +; CHECK-NEXT: st.param.b64 [param0], %rd1; +; CHECK-NEXT: st.param.b64 [param1], 7; ; CHECK-NEXT: call.uni (retval0), callee_variadic, (param0, param1); +; CHECK-NEXT: ld.param.b64 %rd2, [retval0]; +; CHECK-NEXT: } // callseq 5 +; CHECK-NEXT: st.param.b64 [func_retval0], %rd2; +; CHECK-NEXT: ret; %ret = call i64 (ptr, ...) @callee_variadic(ptr %p, i64 7) ret i64 %ret } diff --git a/llvm/test/CodeGen/NVPTX/dynamic_stackalloc.ll b/llvm/test/CodeGen/NVPTX/dynamic_stackalloc.ll index 4d2ba7d..06fb8d2 100644 --- a/llvm/test/CodeGen/NVPTX/dynamic_stackalloc.ll +++ b/llvm/test/CodeGen/NVPTX/dynamic_stackalloc.ll @@ -22,8 +22,8 @@ define i32 @test_dynamic_stackalloc(i64 %n) { ; CHECK-32-NEXT: cvta.local.u32 %r5, %r4; ; CHECK-32-NEXT: { // callseq 0, 0 ; CHECK-32-NEXT: .param .b32 param0; -; CHECK-32-NEXT: st.param.b32 [param0], %r5; ; CHECK-32-NEXT: .param .b32 retval0; +; CHECK-32-NEXT: st.param.b32 [param0], %r5; ; CHECK-32-NEXT: call.uni (retval0), bar, (param0); ; CHECK-32-NEXT: ld.param.b32 %r6, [retval0]; ; CHECK-32-NEXT: } // callseq 0 @@ -43,8 +43,8 @@ define i32 @test_dynamic_stackalloc(i64 %n) { ; CHECK-64-NEXT: cvta.local.u64 %rd5, %rd4; ; CHECK-64-NEXT: { // callseq 0, 0 ; CHECK-64-NEXT: .param .b64 param0; -; CHECK-64-NEXT: st.param.b64 [param0], %rd5; ; CHECK-64-NEXT: .param .b32 retval0; +; CHECK-64-NEXT: st.param.b64 [param0], %rd5; ; CHECK-64-NEXT: call.uni (retval0), bar, (param0); ; CHECK-64-NEXT: ld.param.b32 %r1, [retval0]; ; CHECK-64-NEXT: } // callseq 0 diff --git a/llvm/test/CodeGen/NVPTX/extractelement.ll b/llvm/test/CodeGen/NVPTX/extractelement.ll index 80980ef..d61a63c 100644 --- a/llvm/test/CodeGen/NVPTX/extractelement.ll +++ b/llvm/test/CodeGen/NVPTX/extractelement.ll @@ -56,23 +56,22 @@ define i16 @test_v4i8(i32 %a) { ; CHECK-LABEL: test_v4i8( ; CHECK: { ; CHECK-NEXT: .reg .b16 %rs<8>; -; CHECK-NEXT: .reg .b32 %r<7>; +; CHECK-NEXT: .reg .b32 %r<6>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b32 %r1, [test_v4i8_param_0]; -; CHECK-NEXT: prmt.b32 %r2, %r1, 0, 0x8880U; -; CHECK-NEXT: cvt.u16.u32 %rs1, %r2; -; CHECK-NEXT: prmt.b32 %r3, %r1, 0, 0x9991U; -; CHECK-NEXT: cvt.u16.u32 %rs2, %r3; -; CHECK-NEXT: prmt.b32 %r4, %r1, 0, 0xaaa2U; -; CHECK-NEXT: cvt.u16.u32 %rs3, %r4; -; CHECK-NEXT: prmt.b32 %r5, %r1, 0, 0xbbb3U; -; CHECK-NEXT: cvt.u16.u32 %rs4, %r5; +; CHECK-NEXT: cvt.s8.s32 %rs1, %r1; +; CHECK-NEXT: prmt.b32 %r2, %r1, 0, 0x9991U; +; CHECK-NEXT: cvt.u16.u32 %rs2, %r2; +; CHECK-NEXT: prmt.b32 %r3, %r1, 0, 0xaaa2U; +; CHECK-NEXT: cvt.u16.u32 %rs3, %r3; +; CHECK-NEXT: prmt.b32 %r4, %r1, 0, 0xbbb3U; +; CHECK-NEXT: cvt.u16.u32 %rs4, %r4; ; CHECK-NEXT: add.s16 %rs5, %rs1, %rs2; ; CHECK-NEXT: add.s16 %rs6, %rs3, %rs4; ; CHECK-NEXT: add.s16 %rs7, %rs5, %rs6; -; CHECK-NEXT: cvt.u32.u16 %r6, %rs7; -; CHECK-NEXT: st.param.b32 [func_retval0], %r6; +; CHECK-NEXT: cvt.u32.u16 %r5, %rs7; +; CHECK-NEXT: st.param.b32 [func_retval0], %r5; ; CHECK-NEXT: ret; %v = bitcast i32 %a to <4 x i8> %r0 = extractelement <4 x i8> %v, i64 0 @@ -96,7 +95,7 @@ define i32 @test_v4i8_s32(i32 %a) { ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b32 %r1, [test_v4i8_s32_param_0]; -; CHECK-NEXT: prmt.b32 %r2, %r1, 0, 0x8880U; +; CHECK-NEXT: cvt.s32.s8 %r2, %r1; ; CHECK-NEXT: prmt.b32 %r3, %r1, 0, 0x9991U; ; CHECK-NEXT: prmt.b32 %r4, %r1, 0, 0xaaa2U; ; CHECK-NEXT: prmt.b32 %r5, %r1, 0, 0xbbb3U; @@ -127,12 +126,12 @@ define i32 @test_v4i8_u32(i32 %a) { ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b32 %r1, [test_v4i8_u32_param_0]; -; CHECK-NEXT: prmt.b32 %r2, %r1, 0, 0x7770U; -; CHECK-NEXT: prmt.b32 %r3, %r1, 0, 0x7771U; -; CHECK-NEXT: prmt.b32 %r4, %r1, 0, 0x7772U; -; CHECK-NEXT: prmt.b32 %r5, %r1, 0, 0x7773U; -; CHECK-NEXT: add.s32 %r6, %r2, %r3; -; CHECK-NEXT: add.s32 %r7, %r4, %r5; +; CHECK-NEXT: prmt.b32 %r2, %r1, 0, 0x7771U; +; CHECK-NEXT: prmt.b32 %r3, %r1, 0, 0x7772U; +; CHECK-NEXT: prmt.b32 %r4, %r1, 0, 0x7773U; +; CHECK-NEXT: and.b32 %r5, %r1, 255; +; CHECK-NEXT: add.s32 %r6, %r5, %r2; +; CHECK-NEXT: add.s32 %r7, %r3, %r4; ; CHECK-NEXT: add.s32 %r8, %r6, %r7; ; CHECK-NEXT: st.param.b32 [func_retval0], %r8; ; CHECK-NEXT: ret; @@ -157,26 +156,24 @@ define i16 @test_v8i8(i64 %a) { ; CHECK-LABEL: test_v8i8( ; CHECK: { ; CHECK-NEXT: .reg .b16 %rs<16>; -; CHECK-NEXT: .reg .b32 %r<12>; +; CHECK-NEXT: .reg .b32 %r<10>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_v8i8_param_0]; -; CHECK-NEXT: prmt.b32 %r3, %r1, 0, 0x8880U; -; CHECK-NEXT: cvt.u16.u32 %rs1, %r3; -; CHECK-NEXT: prmt.b32 %r4, %r1, 0, 0x9991U; -; CHECK-NEXT: cvt.u16.u32 %rs2, %r4; -; CHECK-NEXT: prmt.b32 %r5, %r1, 0, 0xaaa2U; -; CHECK-NEXT: cvt.u16.u32 %rs3, %r5; -; CHECK-NEXT: prmt.b32 %r6, %r1, 0, 0xbbb3U; -; CHECK-NEXT: cvt.u16.u32 %rs4, %r6; -; CHECK-NEXT: prmt.b32 %r7, %r2, 0, 0x8880U; -; CHECK-NEXT: cvt.u16.u32 %rs5, %r7; -; CHECK-NEXT: prmt.b32 %r8, %r2, 0, 0x9991U; -; CHECK-NEXT: cvt.u16.u32 %rs6, %r8; -; CHECK-NEXT: prmt.b32 %r9, %r2, 0, 0xaaa2U; -; CHECK-NEXT: cvt.u16.u32 %rs7, %r9; -; CHECK-NEXT: prmt.b32 %r10, %r2, 0, 0xbbb3U; -; CHECK-NEXT: cvt.u16.u32 %rs8, %r10; +; CHECK-NEXT: cvt.s8.s32 %rs1, %r1; +; CHECK-NEXT: prmt.b32 %r3, %r1, 0, 0x9991U; +; CHECK-NEXT: cvt.u16.u32 %rs2, %r3; +; CHECK-NEXT: prmt.b32 %r4, %r1, 0, 0xaaa2U; +; CHECK-NEXT: cvt.u16.u32 %rs3, %r4; +; CHECK-NEXT: prmt.b32 %r5, %r1, 0, 0xbbb3U; +; CHECK-NEXT: cvt.u16.u32 %rs4, %r5; +; CHECK-NEXT: cvt.s8.s32 %rs5, %r2; +; CHECK-NEXT: prmt.b32 %r6, %r2, 0, 0x9991U; +; CHECK-NEXT: cvt.u16.u32 %rs6, %r6; +; CHECK-NEXT: prmt.b32 %r7, %r2, 0, 0xaaa2U; +; CHECK-NEXT: cvt.u16.u32 %rs7, %r7; +; CHECK-NEXT: prmt.b32 %r8, %r2, 0, 0xbbb3U; +; CHECK-NEXT: cvt.u16.u32 %rs8, %r8; ; CHECK-NEXT: add.s16 %rs9, %rs1, %rs2; ; CHECK-NEXT: add.s16 %rs10, %rs3, %rs4; ; CHECK-NEXT: add.s16 %rs11, %rs5, %rs6; @@ -184,8 +181,8 @@ define i16 @test_v8i8(i64 %a) { ; CHECK-NEXT: add.s16 %rs13, %rs9, %rs10; ; CHECK-NEXT: add.s16 %rs14, %rs11, %rs12; ; CHECK-NEXT: add.s16 %rs15, %rs13, %rs14; -; CHECK-NEXT: cvt.u32.u16 %r11, %rs15; -; CHECK-NEXT: st.param.b32 [func_retval0], %r11; +; CHECK-NEXT: cvt.u32.u16 %r9, %rs15; +; CHECK-NEXT: st.param.b32 [func_retval0], %r9; ; CHECK-NEXT: ret; %v = bitcast i64 %a to <8 x i8> %r0 = extractelement <8 x i8> %v, i64 0 diff --git a/llvm/test/CodeGen/NVPTX/f16x2-instructions.ll b/llvm/test/CodeGen/NVPTX/f16x2-instructions.ll index 8918fbd..d4fcea3 100644 --- a/llvm/test/CodeGen/NVPTX/f16x2-instructions.ll +++ b/llvm/test/CodeGen/NVPTX/f16x2-instructions.ll @@ -462,10 +462,10 @@ define <2 x half> @test_call(<2 x half> %a, <2 x half> %b) #0 { ; CHECK-NEXT: ld.param.b32 %r1, [test_call_param_0]; ; CHECK-NEXT: { // callseq 0, 0 ; CHECK-NEXT: .param .align 4 .b8 param0[4]; -; CHECK-NEXT: st.param.b32 [param0], %r1; ; CHECK-NEXT: .param .align 4 .b8 param1[4]; -; CHECK-NEXT: st.param.b32 [param1], %r2; ; CHECK-NEXT: .param .align 4 .b8 retval0[4]; +; CHECK-NEXT: st.param.b32 [param1], %r2; +; CHECK-NEXT: st.param.b32 [param0], %r1; ; CHECK-NEXT: call.uni (retval0), test_callee, (param0, param1); ; CHECK-NEXT: ld.param.b32 %r3, [retval0]; ; CHECK-NEXT: } // callseq 0 @@ -485,10 +485,10 @@ define <2 x half> @test_call_flipped(<2 x half> %a, <2 x half> %b) #0 { ; CHECK-NEXT: ld.param.b32 %r1, [test_call_flipped_param_0]; ; CHECK-NEXT: { // callseq 1, 0 ; CHECK-NEXT: .param .align 4 .b8 param0[4]; -; CHECK-NEXT: st.param.b32 [param0], %r2; ; CHECK-NEXT: .param .align 4 .b8 param1[4]; -; CHECK-NEXT: st.param.b32 [param1], %r1; ; CHECK-NEXT: .param .align 4 .b8 retval0[4]; +; CHECK-NEXT: st.param.b32 [param1], %r1; +; CHECK-NEXT: st.param.b32 [param0], %r2; ; CHECK-NEXT: call.uni (retval0), test_callee, (param0, param1); ; CHECK-NEXT: ld.param.b32 %r3, [retval0]; ; CHECK-NEXT: } // callseq 1 @@ -508,10 +508,10 @@ define <2 x half> @test_tailcall_flipped(<2 x half> %a, <2 x half> %b) #0 { ; CHECK-NEXT: ld.param.b32 %r1, [test_tailcall_flipped_param_0]; ; CHECK-NEXT: { // callseq 2, 0 ; CHECK-NEXT: .param .align 4 .b8 param0[4]; -; CHECK-NEXT: st.param.b32 [param0], %r2; ; CHECK-NEXT: .param .align 4 .b8 param1[4]; -; CHECK-NEXT: st.param.b32 [param1], %r1; ; CHECK-NEXT: .param .align 4 .b8 retval0[4]; +; CHECK-NEXT: st.param.b32 [param1], %r1; +; CHECK-NEXT: st.param.b32 [param0], %r2; ; CHECK-NEXT: call.uni (retval0), test_callee, (param0, param1); ; CHECK-NEXT: ld.param.b32 %r3, [retval0]; ; CHECK-NEXT: } // callseq 2 diff --git a/llvm/test/CodeGen/NVPTX/f32x2-instructions.ll b/llvm/test/CodeGen/NVPTX/f32x2-instructions.ll index 30afd69..b84a0ec 100644 --- a/llvm/test/CodeGen/NVPTX/f32x2-instructions.ll +++ b/llvm/test/CodeGen/NVPTX/f32x2-instructions.ll @@ -859,10 +859,10 @@ define <2 x float> @test_call(<2 x float> %a, <2 x float> %b) #0 { ; CHECK-NEXT: ld.param.b64 %rd1, [test_call_param_0]; ; CHECK-NEXT: { // callseq 0, 0 ; CHECK-NEXT: .param .align 8 .b8 param0[8]; -; CHECK-NEXT: st.param.b64 [param0], %rd1; ; CHECK-NEXT: .param .align 8 .b8 param1[8]; -; CHECK-NEXT: st.param.b64 [param1], %rd2; ; CHECK-NEXT: .param .align 8 .b8 retval0[8]; +; CHECK-NEXT: st.param.b64 [param1], %rd2; +; CHECK-NEXT: st.param.b64 [param0], %rd1; ; CHECK-NEXT: call.uni (retval0), test_callee, (param0, param1); ; CHECK-NEXT: ld.param.b64 %rd3, [retval0]; ; CHECK-NEXT: } // callseq 0 @@ -882,10 +882,10 @@ define <2 x float> @test_call_flipped(<2 x float> %a, <2 x float> %b) #0 { ; CHECK-NEXT: ld.param.b64 %rd1, [test_call_flipped_param_0]; ; CHECK-NEXT: { // callseq 1, 0 ; CHECK-NEXT: .param .align 8 .b8 param0[8]; -; CHECK-NEXT: st.param.b64 [param0], %rd2; ; CHECK-NEXT: .param .align 8 .b8 param1[8]; -; CHECK-NEXT: st.param.b64 [param1], %rd1; ; CHECK-NEXT: .param .align 8 .b8 retval0[8]; +; CHECK-NEXT: st.param.b64 [param1], %rd1; +; CHECK-NEXT: st.param.b64 [param0], %rd2; ; CHECK-NEXT: call.uni (retval0), test_callee, (param0, param1); ; CHECK-NEXT: ld.param.b64 %rd3, [retval0]; ; CHECK-NEXT: } // callseq 1 @@ -905,10 +905,10 @@ define <2 x float> @test_tailcall_flipped(<2 x float> %a, <2 x float> %b) #0 { ; CHECK-NEXT: ld.param.b64 %rd1, [test_tailcall_flipped_param_0]; ; CHECK-NEXT: { // callseq 2, 0 ; CHECK-NEXT: .param .align 8 .b8 param0[8]; -; CHECK-NEXT: st.param.b64 [param0], %rd2; ; CHECK-NEXT: .param .align 8 .b8 param1[8]; -; CHECK-NEXT: st.param.b64 [param1], %rd1; ; CHECK-NEXT: .param .align 8 .b8 retval0[8]; +; CHECK-NEXT: st.param.b64 [param1], %rd1; +; CHECK-NEXT: st.param.b64 [param0], %rd2; ; CHECK-NEXT: call.uni (retval0), test_callee, (param0, param1); ; CHECK-NEXT: ld.param.b64 %rd3, [retval0]; ; CHECK-NEXT: } // callseq 2 diff --git a/llvm/test/CodeGen/NVPTX/fma.ll b/llvm/test/CodeGen/NVPTX/fma.ll index 5aa12b0..87274aa 100644 --- a/llvm/test/CodeGen/NVPTX/fma.ll +++ b/llvm/test/CodeGen/NVPTX/fma.ll @@ -36,10 +36,10 @@ define ptx_device float @t2_f32(float %x, float %y, float %z, float %w) { ; CHECK-NEXT: fma.rn.f32 %r6, %r1, %r2, %r5; ; CHECK-NEXT: { // callseq 0, 0 ; CHECK-NEXT: .param .b32 param0; -; CHECK-NEXT: st.param.b32 [param0], %r4; ; CHECK-NEXT: .param .b32 param1; -; CHECK-NEXT: st.param.b32 [param1], %r6; ; CHECK-NEXT: .param .b32 retval0; +; CHECK-NEXT: st.param.b32 [param1], %r6; +; CHECK-NEXT: st.param.b32 [param0], %r4; ; CHECK-NEXT: call.uni (retval0), dummy_f32, (param0, param1); ; CHECK-NEXT: ld.param.b32 %r7, [retval0]; ; CHECK-NEXT: } // callseq 0 @@ -83,10 +83,10 @@ define ptx_device double @t2_f64(double %x, double %y, double %z, double %w) { ; CHECK-NEXT: fma.rn.f64 %rd6, %rd1, %rd2, %rd5; ; CHECK-NEXT: { // callseq 1, 0 ; CHECK-NEXT: .param .b64 param0; -; CHECK-NEXT: st.param.b64 [param0], %rd4; ; CHECK-NEXT: .param .b64 param1; -; CHECK-NEXT: st.param.b64 [param1], %rd6; ; CHECK-NEXT: .param .b64 retval0; +; CHECK-NEXT: st.param.b64 [param1], %rd6; +; CHECK-NEXT: st.param.b64 [param0], %rd4; ; CHECK-NEXT: call.uni (retval0), dummy_f64, (param0, param1); ; CHECK-NEXT: ld.param.b64 %rd7, [retval0]; ; CHECK-NEXT: } // callseq 1 diff --git a/llvm/test/CodeGen/NVPTX/forward-ld-param.ll b/llvm/test/CodeGen/NVPTX/forward-ld-param.ll index ed8f6b4..636e12b 100644 --- a/llvm/test/CodeGen/NVPTX/forward-ld-param.ll +++ b/llvm/test/CodeGen/NVPTX/forward-ld-param.ll @@ -64,9 +64,9 @@ define void @test_ld_param_byval(ptr byval(i32) %a) { ; CHECK-NEXT: .reg .b64 %rd<2>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: ld.param.b32 %r1, [test_ld_param_byval_param_0]; ; CHECK-NEXT: { // callseq 1, 0 ; CHECK-NEXT: .param .align 4 .b8 param0[4]; +; CHECK-NEXT: ld.param.b32 %r1, [test_ld_param_byval_param_0]; ; CHECK-NEXT: st.param.b32 [param0], %r1; ; CHECK-NEXT: call.uni byval_user, (param0); ; CHECK-NEXT: } // callseq 1 diff --git a/llvm/test/CodeGen/NVPTX/i1-select.ll b/llvm/test/CodeGen/NVPTX/i1-select.ll index f1adc34..9a051b3 100644 --- a/llvm/test/CodeGen/NVPTX/i1-select.ll +++ b/llvm/test/CodeGen/NVPTX/i1-select.ll @@ -94,27 +94,27 @@ define i32 @test_select_i1_basic(i32 %v1, i32 %v2, i32 %v3, i32 %true, i32 %fals define i32 @test_select_i1_basic_folding(i32 %v1, i32 %v2, i32 %v3, i32 %true, i32 %false) { ; CHECK-LABEL: test_select_i1_basic_folding( ; CHECK: { -; CHECK-NEXT: .reg .pred %p<12>; -; CHECK-NEXT: .reg .b32 %r<9>; +; CHECK-NEXT: .reg .pred %p<13>; +; CHECK-NEXT: .reg .b32 %r<7>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b32 %r1, [test_select_i1_basic_folding_param_0]; ; CHECK-NEXT: setp.eq.b32 %p1, %r1, 0; -; CHECK-NEXT: ld.param.b32 %r3, [test_select_i1_basic_folding_param_1]; -; CHECK-NEXT: setp.ne.b32 %p2, %r3, 0; -; CHECK-NEXT: setp.eq.b32 %p3, %r3, 0; -; CHECK-NEXT: ld.param.b32 %r5, [test_select_i1_basic_folding_param_2]; -; CHECK-NEXT: setp.eq.b32 %p4, %r5, 0; -; CHECK-NEXT: ld.param.b32 %r6, [test_select_i1_basic_folding_param_3]; +; CHECK-NEXT: ld.param.b32 %r2, [test_select_i1_basic_folding_param_1]; +; CHECK-NEXT: setp.ne.b32 %p2, %r2, 0; +; CHECK-NEXT: setp.eq.b32 %p3, %r2, 0; +; CHECK-NEXT: ld.param.b32 %r3, [test_select_i1_basic_folding_param_2]; +; CHECK-NEXT: setp.eq.b32 %p4, %r3, 0; +; CHECK-NEXT: ld.param.b32 %r4, [test_select_i1_basic_folding_param_3]; ; CHECK-NEXT: xor.pred %p6, %p1, %p3; -; CHECK-NEXT: ld.param.b32 %r7, [test_select_i1_basic_folding_param_4]; +; CHECK-NEXT: ld.param.b32 %r5, [test_select_i1_basic_folding_param_4]; ; CHECK-NEXT: and.pred %p7, %p6, %p4; -; CHECK-NEXT: and.pred %p8, %p2, %p4; -; CHECK-NEXT: and.pred %p9, %p3, %p7; -; CHECK-NEXT: or.pred %p10, %p9, %p8; -; CHECK-NEXT: xor.pred %p11, %p10, %p3; -; CHECK-NEXT: selp.b32 %r8, %r6, %r7, %p11; -; CHECK-NEXT: st.param.b32 [func_retval0], %r8; +; CHECK-NEXT: and.pred %p9, %p2, %p4; +; CHECK-NEXT: and.pred %p10, %p3, %p7; +; CHECK-NEXT: or.pred %p11, %p10, %p9; +; CHECK-NEXT: xor.pred %p12, %p11, %p3; +; CHECK-NEXT: selp.b32 %r6, %r4, %r5, %p12; +; CHECK-NEXT: st.param.b32 [func_retval0], %r6; ; CHECK-NEXT: ret; %b1 = icmp eq i32 %v1, 0 %b2 = icmp eq i32 %v2, 0 diff --git a/llvm/test/CodeGen/NVPTX/i128-param.ll b/llvm/test/CodeGen/NVPTX/i128-param.ll index 4f4c2fe..79abca0 100644 --- a/llvm/test/CodeGen/NVPTX/i128-param.ll +++ b/llvm/test/CodeGen/NVPTX/i128-param.ll @@ -29,11 +29,11 @@ start: ; CHECK-DAG: ld.param.v2.b64 {%[[REG2:rd[0-9]+]], %[[REG3:rd[0-9]+]]}, [caller_kernel_param_1]; ; CHECK: { // callseq [[CALLSEQ_ID:[0-9]]], 0 - ; CHECK: .param .align 16 .b8 param0[16]; - ; CHECK-NEXT: st.param.v2.b64 [param0], {%[[REG0]], %[[REG1]]} - ; CHECK: .param .align 16 .b8 param1[16]; - ; CHECK-NEXT: st.param.v2.b64 [param1], {%[[REG2]], %[[REG3]]} - ; CHECK: } // callseq [[CALLSEQ_ID]] + ; CHECK-DAG: .param .align 16 .b8 param0[16]; + ; CHECK-DAG: .param .align 16 .b8 param1[16]; + ; CHECK-DAG: st.param.v2.b64 [param0], {%[[REG0]], %[[REG1]]} + ; CHECK-DAG: st.param.v2.b64 [param1], {%[[REG2]], %[[REG3]]} + ; CHECK: } // callseq [[CALLSEQ_ID]] call void @callee(i128 %0, i128 %1, ptr %2) ret void @@ -48,11 +48,11 @@ start: ; CHECK-DAG: ld.param.v2.b64 {%[[REG2:rd[0-9]+]], %[[REG3:rd[0-9]+]]}, [caller_func_param_1] ; CHECK: { // callseq [[CALLSEQ_ID:[0-9]]], 0 - ; CHECK: .param .align 16 .b8 param0[16]; - ; CHECK: st.param.v2.b64 [param0], {%[[REG0]], %[[REG1]]} - ; CHECK: .param .align 16 .b8 param1[16]; - ; CHECK: st.param.v2.b64 [param1], {%[[REG2]], %[[REG3]]} - ; CHECK: } // callseq [[CALLSEQ_ID]] + ; CHECK-DAG: .param .align 16 .b8 param0[16]; + ; CHECK-DAG: .param .align 16 .b8 param1[16]; + ; CHECK-DAG: st.param.v2.b64 [param0], {%[[REG0]], %[[REG1]]} + ; CHECK-DAG: st.param.v2.b64 [param1], {%[[REG2]], %[[REG3]]} + ; CHECK: } // callseq [[CALLSEQ_ID]] call void @callee(i128 %0, i128 %1, ptr %2) ret void diff --git a/llvm/test/CodeGen/NVPTX/i128.ll b/llvm/test/CodeGen/NVPTX/i128.ll index f2211eb..44d8558 100644 --- a/llvm/test/CodeGen/NVPTX/i128.ll +++ b/llvm/test/CodeGen/NVPTX/i128.ll @@ -5,9 +5,9 @@ define i128 @srem_i128(i128 %lhs, i128 %rhs) { ; CHECK-LABEL: srem_i128( ; CHECK: { -; CHECK-NEXT: .reg .pred %p<22>; +; CHECK-NEXT: .reg .pred %p<20>; ; CHECK-NEXT: .reg .b32 %r<12>; -; CHECK-NEXT: .reg .b64 %rd<126>; +; CHECK-NEXT: .reg .b64 %rd<127>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: // %_udiv-special-cases ; CHECK-NEXT: ld.param.v2.b64 {%rd45, %rd46}, [srem_i128_param_0]; @@ -42,103 +42,102 @@ define i128 @srem_i128(i128 %lhs, i128 %rhs) { ; CHECK-NEXT: cvt.u64.u32 %rd62, %r4; ; CHECK-NEXT: add.s64 %rd63, %rd62, 64; ; CHECK-NEXT: selp.b64 %rd64, %rd61, %rd63, %p7; -; CHECK-NEXT: mov.b64 %rd116, 0; +; CHECK-NEXT: mov.b64 %rd117, 0; ; CHECK-NEXT: sub.cc.s64 %rd66, %rd60, %rd64; -; CHECK-NEXT: subc.cc.s64 %rd8, %rd116, 0; -; CHECK-NEXT: setp.ne.b64 %p8, %rd8, 0; -; CHECK-NEXT: and.pred %p10, %p8, %p8; -; CHECK-NEXT: setp.eq.b64 %p11, %rd8, 0; -; CHECK-NEXT: setp.gt.u64 %p12, %rd66, 127; -; CHECK-NEXT: and.pred %p13, %p11, %p12; -; CHECK-NEXT: or.pred %p14, %p13, %p10; -; CHECK-NEXT: or.pred %p15, %p5, %p14; -; CHECK-NEXT: xor.b64 %rd67, %rd66, 127; -; CHECK-NEXT: or.b64 %rd68, %rd67, %rd8; -; CHECK-NEXT: setp.eq.b64 %p16, %rd68, 0; -; CHECK-NEXT: selp.b64 %rd125, 0, %rd4, %p15; -; CHECK-NEXT: selp.b64 %rd124, 0, %rd3, %p15; -; CHECK-NEXT: or.pred %p17, %p15, %p16; -; CHECK-NEXT: @%p17 bra $L__BB0_5; +; CHECK-NEXT: subc.cc.s64 %rd67, %rd117, 0; +; CHECK-NEXT: setp.gt.u64 %p8, %rd66, 127; +; CHECK-NEXT: setp.eq.b64 %p9, %rd67, 0; +; CHECK-NEXT: and.pred %p10, %p9, %p8; +; CHECK-NEXT: setp.ne.b64 %p11, %rd67, 0; +; CHECK-NEXT: or.pred %p12, %p10, %p11; +; CHECK-NEXT: or.pred %p13, %p5, %p12; +; CHECK-NEXT: xor.b64 %rd68, %rd66, 127; +; CHECK-NEXT: or.b64 %rd69, %rd68, %rd67; +; CHECK-NEXT: setp.eq.b64 %p14, %rd69, 0; +; CHECK-NEXT: selp.b64 %rd126, 0, %rd4, %p13; +; CHECK-NEXT: selp.b64 %rd125, 0, %rd3, %p13; +; CHECK-NEXT: or.pred %p15, %p13, %p14; +; CHECK-NEXT: @%p15 bra $L__BB0_5; ; CHECK-NEXT: // %bb.3: // %udiv-bb1 -; CHECK-NEXT: add.cc.s64 %rd118, %rd66, 1; -; CHECK-NEXT: addc.cc.s64 %rd119, %rd8, 0; -; CHECK-NEXT: or.b64 %rd71, %rd118, %rd119; -; CHECK-NEXT: setp.eq.b64 %p18, %rd71, 0; +; CHECK-NEXT: add.cc.s64 %rd119, %rd66, 1; +; CHECK-NEXT: addc.cc.s64 %rd120, %rd67, 0; +; CHECK-NEXT: or.b64 %rd72, %rd119, %rd120; +; CHECK-NEXT: setp.eq.b64 %p16, %rd72, 0; ; CHECK-NEXT: cvt.u32.u64 %r5, %rd66; ; CHECK-NEXT: sub.s32 %r6, 127, %r5; -; CHECK-NEXT: shl.b64 %rd72, %rd4, %r6; +; CHECK-NEXT: shl.b64 %rd73, %rd4, %r6; ; CHECK-NEXT: sub.s32 %r7, 64, %r6; -; CHECK-NEXT: shr.u64 %rd73, %rd3, %r7; -; CHECK-NEXT: or.b64 %rd74, %rd72, %rd73; +; CHECK-NEXT: shr.u64 %rd74, %rd3, %r7; +; CHECK-NEXT: or.b64 %rd75, %rd73, %rd74; ; CHECK-NEXT: sub.s32 %r8, 63, %r5; -; CHECK-NEXT: shl.b64 %rd75, %rd3, %r8; -; CHECK-NEXT: setp.gt.s32 %p19, %r6, 63; -; CHECK-NEXT: selp.b64 %rd123, %rd75, %rd74, %p19; -; CHECK-NEXT: shl.b64 %rd122, %rd3, %r6; -; CHECK-NEXT: mov.b64 %rd113, %rd116; -; CHECK-NEXT: @%p18 bra $L__BB0_4; +; CHECK-NEXT: shl.b64 %rd76, %rd3, %r8; +; CHECK-NEXT: setp.gt.s32 %p17, %r6, 63; +; CHECK-NEXT: selp.b64 %rd124, %rd76, %rd75, %p17; +; CHECK-NEXT: shl.b64 %rd123, %rd3, %r6; +; CHECK-NEXT: mov.b64 %rd114, %rd117; +; CHECK-NEXT: @%p16 bra $L__BB0_4; ; CHECK-NEXT: // %bb.1: // %udiv-preheader -; CHECK-NEXT: cvt.u32.u64 %r9, %rd118; -; CHECK-NEXT: shr.u64 %rd78, %rd3, %r9; +; CHECK-NEXT: cvt.u32.u64 %r9, %rd119; +; CHECK-NEXT: shr.u64 %rd79, %rd3, %r9; ; CHECK-NEXT: sub.s32 %r10, 64, %r9; -; CHECK-NEXT: shl.b64 %rd79, %rd4, %r10; -; CHECK-NEXT: or.b64 %rd80, %rd78, %rd79; +; CHECK-NEXT: shl.b64 %rd80, %rd4, %r10; +; CHECK-NEXT: or.b64 %rd81, %rd79, %rd80; ; CHECK-NEXT: add.s32 %r11, %r9, -64; -; CHECK-NEXT: shr.u64 %rd81, %rd4, %r11; -; CHECK-NEXT: setp.gt.s32 %p20, %r9, 63; -; CHECK-NEXT: selp.b64 %rd120, %rd81, %rd80, %p20; -; CHECK-NEXT: shr.u64 %rd121, %rd4, %r9; +; CHECK-NEXT: shr.u64 %rd82, %rd4, %r11; +; CHECK-NEXT: setp.gt.s32 %p18, %r9, 63; +; CHECK-NEXT: selp.b64 %rd121, %rd82, %rd81, %p18; +; CHECK-NEXT: shr.u64 %rd122, %rd4, %r9; ; CHECK-NEXT: add.cc.s64 %rd35, %rd5, -1; ; CHECK-NEXT: addc.cc.s64 %rd36, %rd6, -1; -; CHECK-NEXT: mov.b64 %rd113, 0; -; CHECK-NEXT: mov.b64 %rd116, %rd113; +; CHECK-NEXT: mov.b64 %rd114, 0; +; CHECK-NEXT: mov.b64 %rd117, %rd114; ; CHECK-NEXT: $L__BB0_2: // %udiv-do-while ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: shr.u64 %rd82, %rd120, 63; -; CHECK-NEXT: shl.b64 %rd83, %rd121, 1; -; CHECK-NEXT: or.b64 %rd84, %rd83, %rd82; -; CHECK-NEXT: shl.b64 %rd85, %rd120, 1; -; CHECK-NEXT: shr.u64 %rd86, %rd123, 63; -; CHECK-NEXT: or.b64 %rd87, %rd85, %rd86; -; CHECK-NEXT: shr.u64 %rd88, %rd122, 63; -; CHECK-NEXT: shl.b64 %rd89, %rd123, 1; -; CHECK-NEXT: or.b64 %rd90, %rd89, %rd88; -; CHECK-NEXT: shl.b64 %rd91, %rd122, 1; -; CHECK-NEXT: or.b64 %rd122, %rd116, %rd91; -; CHECK-NEXT: or.b64 %rd123, %rd113, %rd90; -; CHECK-NEXT: sub.cc.s64 %rd92, %rd35, %rd87; -; CHECK-NEXT: subc.cc.s64 %rd93, %rd36, %rd84; -; CHECK-NEXT: shr.s64 %rd94, %rd93, 63; -; CHECK-NEXT: and.b64 %rd116, %rd94, 1; -; CHECK-NEXT: and.b64 %rd95, %rd94, %rd5; -; CHECK-NEXT: and.b64 %rd96, %rd94, %rd6; -; CHECK-NEXT: sub.cc.s64 %rd120, %rd87, %rd95; -; CHECK-NEXT: subc.cc.s64 %rd121, %rd84, %rd96; -; CHECK-NEXT: add.cc.s64 %rd118, %rd118, -1; -; CHECK-NEXT: addc.cc.s64 %rd119, %rd119, -1; -; CHECK-NEXT: or.b64 %rd97, %rd118, %rd119; -; CHECK-NEXT: setp.eq.b64 %p21, %rd97, 0; -; CHECK-NEXT: @%p21 bra $L__BB0_4; +; CHECK-NEXT: shr.u64 %rd83, %rd121, 63; +; CHECK-NEXT: shl.b64 %rd84, %rd122, 1; +; CHECK-NEXT: or.b64 %rd85, %rd84, %rd83; +; CHECK-NEXT: shl.b64 %rd86, %rd121, 1; +; CHECK-NEXT: shr.u64 %rd87, %rd124, 63; +; CHECK-NEXT: or.b64 %rd88, %rd86, %rd87; +; CHECK-NEXT: shr.u64 %rd89, %rd123, 63; +; CHECK-NEXT: shl.b64 %rd90, %rd124, 1; +; CHECK-NEXT: or.b64 %rd91, %rd90, %rd89; +; CHECK-NEXT: shl.b64 %rd92, %rd123, 1; +; CHECK-NEXT: or.b64 %rd123, %rd117, %rd92; +; CHECK-NEXT: or.b64 %rd124, %rd114, %rd91; +; CHECK-NEXT: sub.cc.s64 %rd93, %rd35, %rd88; +; CHECK-NEXT: subc.cc.s64 %rd94, %rd36, %rd85; +; CHECK-NEXT: shr.s64 %rd95, %rd94, 63; +; CHECK-NEXT: and.b64 %rd117, %rd95, 1; +; CHECK-NEXT: and.b64 %rd96, %rd95, %rd5; +; CHECK-NEXT: and.b64 %rd97, %rd95, %rd6; +; CHECK-NEXT: sub.cc.s64 %rd121, %rd88, %rd96; +; CHECK-NEXT: subc.cc.s64 %rd122, %rd85, %rd97; +; CHECK-NEXT: add.cc.s64 %rd119, %rd119, -1; +; CHECK-NEXT: addc.cc.s64 %rd120, %rd120, -1; +; CHECK-NEXT: or.b64 %rd98, %rd119, %rd120; +; CHECK-NEXT: setp.eq.b64 %p19, %rd98, 0; +; CHECK-NEXT: @%p19 bra $L__BB0_4; ; CHECK-NEXT: bra.uni $L__BB0_2; ; CHECK-NEXT: $L__BB0_4: // %udiv-loop-exit -; CHECK-NEXT: shr.u64 %rd98, %rd122, 63; -; CHECK-NEXT: shl.b64 %rd99, %rd123, 1; -; CHECK-NEXT: or.b64 %rd100, %rd99, %rd98; -; CHECK-NEXT: shl.b64 %rd101, %rd122, 1; -; CHECK-NEXT: or.b64 %rd124, %rd116, %rd101; -; CHECK-NEXT: or.b64 %rd125, %rd113, %rd100; +; CHECK-NEXT: shr.u64 %rd99, %rd123, 63; +; CHECK-NEXT: shl.b64 %rd100, %rd124, 1; +; CHECK-NEXT: or.b64 %rd101, %rd100, %rd99; +; CHECK-NEXT: shl.b64 %rd102, %rd123, 1; +; CHECK-NEXT: or.b64 %rd125, %rd117, %rd102; +; CHECK-NEXT: or.b64 %rd126, %rd114, %rd101; ; CHECK-NEXT: $L__BB0_5: // %udiv-end -; CHECK-NEXT: mul.hi.u64 %rd102, %rd5, %rd124; -; CHECK-NEXT: mad.lo.s64 %rd103, %rd5, %rd125, %rd102; -; CHECK-NEXT: mad.lo.s64 %rd104, %rd6, %rd124, %rd103; -; CHECK-NEXT: mul.lo.s64 %rd105, %rd5, %rd124; -; CHECK-NEXT: sub.cc.s64 %rd106, %rd3, %rd105; -; CHECK-NEXT: subc.cc.s64 %rd107, %rd4, %rd104; -; CHECK-NEXT: xor.b64 %rd108, %rd106, %rd2; +; CHECK-NEXT: mul.hi.u64 %rd103, %rd5, %rd125; +; CHECK-NEXT: mad.lo.s64 %rd104, %rd5, %rd126, %rd103; +; CHECK-NEXT: mad.lo.s64 %rd105, %rd6, %rd125, %rd104; +; CHECK-NEXT: mul.lo.s64 %rd106, %rd5, %rd125; +; CHECK-NEXT: sub.cc.s64 %rd107, %rd3, %rd106; +; CHECK-NEXT: subc.cc.s64 %rd108, %rd4, %rd105; ; CHECK-NEXT: xor.b64 %rd109, %rd107, %rd2; -; CHECK-NEXT: sub.cc.s64 %rd110, %rd108, %rd2; -; CHECK-NEXT: subc.cc.s64 %rd111, %rd109, %rd2; -; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd110, %rd111}; +; CHECK-NEXT: xor.b64 %rd110, %rd108, %rd2; +; CHECK-NEXT: sub.cc.s64 %rd111, %rd109, %rd2; +; CHECK-NEXT: subc.cc.s64 %rd112, %rd110, %rd2; +; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd111, %rd112}; ; CHECK-NEXT: ret; %div = srem i128 %lhs, %rhs ret i128 %div @@ -149,7 +148,7 @@ define i128 @urem_i128(i128 %lhs, i128 %rhs) { ; CHECK: { ; CHECK-NEXT: .reg .pred %p<18>; ; CHECK-NEXT: .reg .b32 %r<12>; -; CHECK-NEXT: .reg .b64 %rd<111>; +; CHECK-NEXT: .reg .b64 %rd<113>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: // %_udiv-special-cases ; CHECK-NEXT: ld.param.v2.b64 {%rd41, %rd42}, [urem_i128_param_0]; @@ -173,98 +172,98 @@ define i128 @urem_i128(i128 %lhs, i128 %rhs) { ; CHECK-NEXT: cvt.u64.u32 %rd52, %r4; ; CHECK-NEXT: add.s64 %rd53, %rd52, 64; ; CHECK-NEXT: selp.b64 %rd54, %rd51, %rd53, %p5; -; CHECK-NEXT: mov.b64 %rd101, 0; -; CHECK-NEXT: sub.cc.s64 %rd5, %rd50, %rd54; -; CHECK-NEXT: subc.cc.s64 %rd6, %rd101, 0; -; CHECK-NEXT: setp.gt.u64 %p6, %rd5, 127; -; CHECK-NEXT: setp.eq.b64 %p7, %rd6, 0; +; CHECK-NEXT: mov.b64 %rd103, 0; +; CHECK-NEXT: sub.cc.s64 %rd56, %rd50, %rd54; +; CHECK-NEXT: subc.cc.s64 %rd57, %rd103, 0; +; CHECK-NEXT: setp.gt.u64 %p6, %rd56, 127; +; CHECK-NEXT: setp.eq.b64 %p7, %rd57, 0; ; CHECK-NEXT: and.pred %p8, %p7, %p6; -; CHECK-NEXT: setp.ne.b64 %p9, %rd6, 0; +; CHECK-NEXT: setp.ne.b64 %p9, %rd57, 0; ; CHECK-NEXT: or.pred %p10, %p8, %p9; ; CHECK-NEXT: or.pred %p11, %p3, %p10; -; CHECK-NEXT: xor.b64 %rd56, %rd5, 127; -; CHECK-NEXT: or.b64 %rd57, %rd56, %rd6; -; CHECK-NEXT: setp.eq.b64 %p12, %rd57, 0; -; CHECK-NEXT: selp.b64 %rd110, 0, %rd42, %p11; -; CHECK-NEXT: selp.b64 %rd109, 0, %rd41, %p11; +; CHECK-NEXT: xor.b64 %rd58, %rd56, 127; +; CHECK-NEXT: or.b64 %rd59, %rd58, %rd57; +; CHECK-NEXT: setp.eq.b64 %p12, %rd59, 0; +; CHECK-NEXT: selp.b64 %rd112, 0, %rd42, %p11; +; CHECK-NEXT: selp.b64 %rd111, 0, %rd41, %p11; ; CHECK-NEXT: or.pred %p13, %p11, %p12; ; CHECK-NEXT: @%p13 bra $L__BB1_5; ; CHECK-NEXT: // %bb.3: // %udiv-bb1 -; CHECK-NEXT: add.cc.s64 %rd103, %rd5, 1; -; CHECK-NEXT: addc.cc.s64 %rd104, %rd6, 0; -; CHECK-NEXT: or.b64 %rd60, %rd103, %rd104; -; CHECK-NEXT: setp.eq.b64 %p14, %rd60, 0; -; CHECK-NEXT: cvt.u32.u64 %r5, %rd5; +; CHECK-NEXT: add.cc.s64 %rd105, %rd56, 1; +; CHECK-NEXT: addc.cc.s64 %rd106, %rd57, 0; +; CHECK-NEXT: or.b64 %rd62, %rd105, %rd106; +; CHECK-NEXT: setp.eq.b64 %p14, %rd62, 0; +; CHECK-NEXT: cvt.u32.u64 %r5, %rd56; ; CHECK-NEXT: sub.s32 %r6, 127, %r5; -; CHECK-NEXT: shl.b64 %rd61, %rd42, %r6; +; CHECK-NEXT: shl.b64 %rd63, %rd42, %r6; ; CHECK-NEXT: sub.s32 %r7, 64, %r6; -; CHECK-NEXT: shr.u64 %rd62, %rd41, %r7; -; CHECK-NEXT: or.b64 %rd63, %rd61, %rd62; +; CHECK-NEXT: shr.u64 %rd64, %rd41, %r7; +; CHECK-NEXT: or.b64 %rd65, %rd63, %rd64; ; CHECK-NEXT: sub.s32 %r8, 63, %r5; -; CHECK-NEXT: shl.b64 %rd64, %rd41, %r8; +; CHECK-NEXT: shl.b64 %rd66, %rd41, %r8; ; CHECK-NEXT: setp.gt.s32 %p15, %r6, 63; -; CHECK-NEXT: selp.b64 %rd108, %rd64, %rd63, %p15; -; CHECK-NEXT: shl.b64 %rd107, %rd41, %r6; -; CHECK-NEXT: mov.b64 %rd98, %rd101; +; CHECK-NEXT: selp.b64 %rd110, %rd66, %rd65, %p15; +; CHECK-NEXT: shl.b64 %rd109, %rd41, %r6; +; CHECK-NEXT: mov.b64 %rd100, %rd103; ; CHECK-NEXT: @%p14 bra $L__BB1_4; ; CHECK-NEXT: // %bb.1: // %udiv-preheader -; CHECK-NEXT: cvt.u32.u64 %r9, %rd103; -; CHECK-NEXT: shr.u64 %rd67, %rd41, %r9; +; CHECK-NEXT: cvt.u32.u64 %r9, %rd105; +; CHECK-NEXT: shr.u64 %rd69, %rd41, %r9; ; CHECK-NEXT: sub.s32 %r10, 64, %r9; -; CHECK-NEXT: shl.b64 %rd68, %rd42, %r10; -; CHECK-NEXT: or.b64 %rd69, %rd67, %rd68; +; CHECK-NEXT: shl.b64 %rd70, %rd42, %r10; +; CHECK-NEXT: or.b64 %rd71, %rd69, %rd70; ; CHECK-NEXT: add.s32 %r11, %r9, -64; -; CHECK-NEXT: shr.u64 %rd70, %rd42, %r11; +; CHECK-NEXT: shr.u64 %rd72, %rd42, %r11; ; CHECK-NEXT: setp.gt.s32 %p16, %r9, 63; -; CHECK-NEXT: selp.b64 %rd105, %rd70, %rd69, %p16; -; CHECK-NEXT: shr.u64 %rd106, %rd42, %r9; +; CHECK-NEXT: selp.b64 %rd107, %rd72, %rd71, %p16; +; CHECK-NEXT: shr.u64 %rd108, %rd42, %r9; ; CHECK-NEXT: add.cc.s64 %rd33, %rd3, -1; ; CHECK-NEXT: addc.cc.s64 %rd34, %rd4, -1; -; CHECK-NEXT: mov.b64 %rd98, 0; -; CHECK-NEXT: mov.b64 %rd101, %rd98; +; CHECK-NEXT: mov.b64 %rd100, 0; +; CHECK-NEXT: mov.b64 %rd103, %rd100; ; CHECK-NEXT: $L__BB1_2: // %udiv-do-while ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: shr.u64 %rd71, %rd105, 63; -; CHECK-NEXT: shl.b64 %rd72, %rd106, 1; -; CHECK-NEXT: or.b64 %rd73, %rd72, %rd71; -; CHECK-NEXT: shl.b64 %rd74, %rd105, 1; -; CHECK-NEXT: shr.u64 %rd75, %rd108, 63; -; CHECK-NEXT: or.b64 %rd76, %rd74, %rd75; -; CHECK-NEXT: shr.u64 %rd77, %rd107, 63; -; CHECK-NEXT: shl.b64 %rd78, %rd108, 1; -; CHECK-NEXT: or.b64 %rd79, %rd78, %rd77; -; CHECK-NEXT: shl.b64 %rd80, %rd107, 1; -; CHECK-NEXT: or.b64 %rd107, %rd101, %rd80; -; CHECK-NEXT: or.b64 %rd108, %rd98, %rd79; -; CHECK-NEXT: sub.cc.s64 %rd81, %rd33, %rd76; -; CHECK-NEXT: subc.cc.s64 %rd82, %rd34, %rd73; -; CHECK-NEXT: shr.s64 %rd83, %rd82, 63; -; CHECK-NEXT: and.b64 %rd101, %rd83, 1; -; CHECK-NEXT: and.b64 %rd84, %rd83, %rd3; -; CHECK-NEXT: and.b64 %rd85, %rd83, %rd4; -; CHECK-NEXT: sub.cc.s64 %rd105, %rd76, %rd84; -; CHECK-NEXT: subc.cc.s64 %rd106, %rd73, %rd85; -; CHECK-NEXT: add.cc.s64 %rd103, %rd103, -1; -; CHECK-NEXT: addc.cc.s64 %rd104, %rd104, -1; -; CHECK-NEXT: or.b64 %rd86, %rd103, %rd104; -; CHECK-NEXT: setp.eq.b64 %p17, %rd86, 0; +; CHECK-NEXT: shr.u64 %rd73, %rd107, 63; +; CHECK-NEXT: shl.b64 %rd74, %rd108, 1; +; CHECK-NEXT: or.b64 %rd75, %rd74, %rd73; +; CHECK-NEXT: shl.b64 %rd76, %rd107, 1; +; CHECK-NEXT: shr.u64 %rd77, %rd110, 63; +; CHECK-NEXT: or.b64 %rd78, %rd76, %rd77; +; CHECK-NEXT: shr.u64 %rd79, %rd109, 63; +; CHECK-NEXT: shl.b64 %rd80, %rd110, 1; +; CHECK-NEXT: or.b64 %rd81, %rd80, %rd79; +; CHECK-NEXT: shl.b64 %rd82, %rd109, 1; +; CHECK-NEXT: or.b64 %rd109, %rd103, %rd82; +; CHECK-NEXT: or.b64 %rd110, %rd100, %rd81; +; CHECK-NEXT: sub.cc.s64 %rd83, %rd33, %rd78; +; CHECK-NEXT: subc.cc.s64 %rd84, %rd34, %rd75; +; CHECK-NEXT: shr.s64 %rd85, %rd84, 63; +; CHECK-NEXT: and.b64 %rd103, %rd85, 1; +; CHECK-NEXT: and.b64 %rd86, %rd85, %rd3; +; CHECK-NEXT: and.b64 %rd87, %rd85, %rd4; +; CHECK-NEXT: sub.cc.s64 %rd107, %rd78, %rd86; +; CHECK-NEXT: subc.cc.s64 %rd108, %rd75, %rd87; +; CHECK-NEXT: add.cc.s64 %rd105, %rd105, -1; +; CHECK-NEXT: addc.cc.s64 %rd106, %rd106, -1; +; CHECK-NEXT: or.b64 %rd88, %rd105, %rd106; +; CHECK-NEXT: setp.eq.b64 %p17, %rd88, 0; ; CHECK-NEXT: @%p17 bra $L__BB1_4; ; CHECK-NEXT: bra.uni $L__BB1_2; ; CHECK-NEXT: $L__BB1_4: // %udiv-loop-exit -; CHECK-NEXT: shr.u64 %rd87, %rd107, 63; -; CHECK-NEXT: shl.b64 %rd88, %rd108, 1; -; CHECK-NEXT: or.b64 %rd89, %rd88, %rd87; -; CHECK-NEXT: shl.b64 %rd90, %rd107, 1; -; CHECK-NEXT: or.b64 %rd109, %rd101, %rd90; -; CHECK-NEXT: or.b64 %rd110, %rd98, %rd89; +; CHECK-NEXT: shr.u64 %rd89, %rd109, 63; +; CHECK-NEXT: shl.b64 %rd90, %rd110, 1; +; CHECK-NEXT: or.b64 %rd91, %rd90, %rd89; +; CHECK-NEXT: shl.b64 %rd92, %rd109, 1; +; CHECK-NEXT: or.b64 %rd111, %rd103, %rd92; +; CHECK-NEXT: or.b64 %rd112, %rd100, %rd91; ; CHECK-NEXT: $L__BB1_5: // %udiv-end -; CHECK-NEXT: mul.hi.u64 %rd91, %rd3, %rd109; -; CHECK-NEXT: mad.lo.s64 %rd92, %rd3, %rd110, %rd91; -; CHECK-NEXT: mad.lo.s64 %rd93, %rd4, %rd109, %rd92; -; CHECK-NEXT: mul.lo.s64 %rd94, %rd3, %rd109; -; CHECK-NEXT: sub.cc.s64 %rd95, %rd41, %rd94; -; CHECK-NEXT: subc.cc.s64 %rd96, %rd42, %rd93; -; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd95, %rd96}; +; CHECK-NEXT: mul.hi.u64 %rd93, %rd3, %rd111; +; CHECK-NEXT: mad.lo.s64 %rd94, %rd3, %rd112, %rd93; +; CHECK-NEXT: mad.lo.s64 %rd95, %rd4, %rd111, %rd94; +; CHECK-NEXT: mul.lo.s64 %rd96, %rd3, %rd111; +; CHECK-NEXT: sub.cc.s64 %rd97, %rd41, %rd96; +; CHECK-NEXT: subc.cc.s64 %rd98, %rd42, %rd95; +; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd97, %rd98}; ; CHECK-NEXT: ret; %div = urem i128 %lhs, %rhs ret i128 %div @@ -307,9 +306,9 @@ define i128 @urem_i128_pow2k(i128 %lhs) { define i128 @sdiv_i128(i128 %lhs, i128 %rhs) { ; CHECK-LABEL: sdiv_i128( ; CHECK: { -; CHECK-NEXT: .reg .pred %p<22>; +; CHECK-NEXT: .reg .pred %p<20>; ; CHECK-NEXT: .reg .b32 %r<12>; -; CHECK-NEXT: .reg .b64 %rd<121>; +; CHECK-NEXT: .reg .b64 %rd<122>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: // %_udiv-special-cases ; CHECK-NEXT: ld.param.v2.b64 {%rd45, %rd46}, [sdiv_i128_param_0]; @@ -345,97 +344,96 @@ define i128 @sdiv_i128(i128 %lhs, i128 %rhs) { ; CHECK-NEXT: cvt.u64.u32 %rd63, %r4; ; CHECK-NEXT: add.s64 %rd64, %rd63, 64; ; CHECK-NEXT: selp.b64 %rd65, %rd62, %rd64, %p7; -; CHECK-NEXT: mov.b64 %rd111, 0; +; CHECK-NEXT: mov.b64 %rd112, 0; ; CHECK-NEXT: sub.cc.s64 %rd67, %rd61, %rd65; -; CHECK-NEXT: subc.cc.s64 %rd8, %rd111, 0; -; CHECK-NEXT: setp.ne.b64 %p8, %rd8, 0; -; CHECK-NEXT: and.pred %p10, %p8, %p8; -; CHECK-NEXT: setp.eq.b64 %p11, %rd8, 0; -; CHECK-NEXT: setp.gt.u64 %p12, %rd67, 127; -; CHECK-NEXT: and.pred %p13, %p11, %p12; -; CHECK-NEXT: or.pred %p14, %p13, %p10; -; CHECK-NEXT: or.pred %p15, %p5, %p14; -; CHECK-NEXT: xor.b64 %rd68, %rd67, 127; -; CHECK-NEXT: or.b64 %rd69, %rd68, %rd8; -; CHECK-NEXT: setp.eq.b64 %p16, %rd69, 0; -; CHECK-NEXT: selp.b64 %rd120, 0, %rd2, %p15; -; CHECK-NEXT: selp.b64 %rd119, 0, %rd1, %p15; -; CHECK-NEXT: or.pred %p17, %p15, %p16; -; CHECK-NEXT: @%p17 bra $L__BB4_5; +; CHECK-NEXT: subc.cc.s64 %rd68, %rd112, 0; +; CHECK-NEXT: setp.gt.u64 %p8, %rd67, 127; +; CHECK-NEXT: setp.eq.b64 %p9, %rd68, 0; +; CHECK-NEXT: and.pred %p10, %p9, %p8; +; CHECK-NEXT: setp.ne.b64 %p11, %rd68, 0; +; CHECK-NEXT: or.pred %p12, %p10, %p11; +; CHECK-NEXT: or.pred %p13, %p5, %p12; +; CHECK-NEXT: xor.b64 %rd69, %rd67, 127; +; CHECK-NEXT: or.b64 %rd70, %rd69, %rd68; +; CHECK-NEXT: setp.eq.b64 %p14, %rd70, 0; +; CHECK-NEXT: selp.b64 %rd121, 0, %rd2, %p13; +; CHECK-NEXT: selp.b64 %rd120, 0, %rd1, %p13; +; CHECK-NEXT: or.pred %p15, %p13, %p14; +; CHECK-NEXT: @%p15 bra $L__BB4_5; ; CHECK-NEXT: // %bb.3: // %udiv-bb1 -; CHECK-NEXT: add.cc.s64 %rd113, %rd67, 1; -; CHECK-NEXT: addc.cc.s64 %rd114, %rd8, 0; -; CHECK-NEXT: or.b64 %rd72, %rd113, %rd114; -; CHECK-NEXT: setp.eq.b64 %p18, %rd72, 0; +; CHECK-NEXT: add.cc.s64 %rd114, %rd67, 1; +; CHECK-NEXT: addc.cc.s64 %rd115, %rd68, 0; +; CHECK-NEXT: or.b64 %rd73, %rd114, %rd115; +; CHECK-NEXT: setp.eq.b64 %p16, %rd73, 0; ; CHECK-NEXT: cvt.u32.u64 %r5, %rd67; ; CHECK-NEXT: sub.s32 %r6, 127, %r5; -; CHECK-NEXT: shl.b64 %rd73, %rd2, %r6; +; CHECK-NEXT: shl.b64 %rd74, %rd2, %r6; ; CHECK-NEXT: sub.s32 %r7, 64, %r6; -; CHECK-NEXT: shr.u64 %rd74, %rd1, %r7; -; CHECK-NEXT: or.b64 %rd75, %rd73, %rd74; +; CHECK-NEXT: shr.u64 %rd75, %rd1, %r7; +; CHECK-NEXT: or.b64 %rd76, %rd74, %rd75; ; CHECK-NEXT: sub.s32 %r8, 63, %r5; -; CHECK-NEXT: shl.b64 %rd76, %rd1, %r8; -; CHECK-NEXT: setp.gt.s32 %p19, %r6, 63; -; CHECK-NEXT: selp.b64 %rd118, %rd76, %rd75, %p19; -; CHECK-NEXT: shl.b64 %rd117, %rd1, %r6; -; CHECK-NEXT: mov.b64 %rd108, %rd111; -; CHECK-NEXT: @%p18 bra $L__BB4_4; +; CHECK-NEXT: shl.b64 %rd77, %rd1, %r8; +; CHECK-NEXT: setp.gt.s32 %p17, %r6, 63; +; CHECK-NEXT: selp.b64 %rd119, %rd77, %rd76, %p17; +; CHECK-NEXT: shl.b64 %rd118, %rd1, %r6; +; CHECK-NEXT: mov.b64 %rd109, %rd112; +; CHECK-NEXT: @%p16 bra $L__BB4_4; ; CHECK-NEXT: // %bb.1: // %udiv-preheader -; CHECK-NEXT: cvt.u32.u64 %r9, %rd113; -; CHECK-NEXT: shr.u64 %rd79, %rd1, %r9; +; CHECK-NEXT: cvt.u32.u64 %r9, %rd114; +; CHECK-NEXT: shr.u64 %rd80, %rd1, %r9; ; CHECK-NEXT: sub.s32 %r10, 64, %r9; -; CHECK-NEXT: shl.b64 %rd80, %rd2, %r10; -; CHECK-NEXT: or.b64 %rd81, %rd79, %rd80; +; CHECK-NEXT: shl.b64 %rd81, %rd2, %r10; +; CHECK-NEXT: or.b64 %rd82, %rd80, %rd81; ; CHECK-NEXT: add.s32 %r11, %r9, -64; -; CHECK-NEXT: shr.u64 %rd82, %rd2, %r11; -; CHECK-NEXT: setp.gt.s32 %p20, %r9, 63; -; CHECK-NEXT: selp.b64 %rd115, %rd82, %rd81, %p20; -; CHECK-NEXT: shr.u64 %rd116, %rd2, %r9; +; CHECK-NEXT: shr.u64 %rd83, %rd2, %r11; +; CHECK-NEXT: setp.gt.s32 %p18, %r9, 63; +; CHECK-NEXT: selp.b64 %rd116, %rd83, %rd82, %p18; +; CHECK-NEXT: shr.u64 %rd117, %rd2, %r9; ; CHECK-NEXT: add.cc.s64 %rd35, %rd3, -1; ; CHECK-NEXT: addc.cc.s64 %rd36, %rd4, -1; -; CHECK-NEXT: mov.b64 %rd108, 0; -; CHECK-NEXT: mov.b64 %rd111, %rd108; +; CHECK-NEXT: mov.b64 %rd109, 0; +; CHECK-NEXT: mov.b64 %rd112, %rd109; ; CHECK-NEXT: $L__BB4_2: // %udiv-do-while ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: shr.u64 %rd83, %rd115, 63; -; CHECK-NEXT: shl.b64 %rd84, %rd116, 1; -; CHECK-NEXT: or.b64 %rd85, %rd84, %rd83; -; CHECK-NEXT: shl.b64 %rd86, %rd115, 1; -; CHECK-NEXT: shr.u64 %rd87, %rd118, 63; -; CHECK-NEXT: or.b64 %rd88, %rd86, %rd87; -; CHECK-NEXT: shr.u64 %rd89, %rd117, 63; -; CHECK-NEXT: shl.b64 %rd90, %rd118, 1; -; CHECK-NEXT: or.b64 %rd91, %rd90, %rd89; -; CHECK-NEXT: shl.b64 %rd92, %rd117, 1; -; CHECK-NEXT: or.b64 %rd117, %rd111, %rd92; -; CHECK-NEXT: or.b64 %rd118, %rd108, %rd91; -; CHECK-NEXT: sub.cc.s64 %rd93, %rd35, %rd88; -; CHECK-NEXT: subc.cc.s64 %rd94, %rd36, %rd85; -; CHECK-NEXT: shr.s64 %rd95, %rd94, 63; -; CHECK-NEXT: and.b64 %rd111, %rd95, 1; -; CHECK-NEXT: and.b64 %rd96, %rd95, %rd3; -; CHECK-NEXT: and.b64 %rd97, %rd95, %rd4; -; CHECK-NEXT: sub.cc.s64 %rd115, %rd88, %rd96; -; CHECK-NEXT: subc.cc.s64 %rd116, %rd85, %rd97; -; CHECK-NEXT: add.cc.s64 %rd113, %rd113, -1; -; CHECK-NEXT: addc.cc.s64 %rd114, %rd114, -1; -; CHECK-NEXT: or.b64 %rd98, %rd113, %rd114; -; CHECK-NEXT: setp.eq.b64 %p21, %rd98, 0; -; CHECK-NEXT: @%p21 bra $L__BB4_4; +; CHECK-NEXT: shr.u64 %rd84, %rd116, 63; +; CHECK-NEXT: shl.b64 %rd85, %rd117, 1; +; CHECK-NEXT: or.b64 %rd86, %rd85, %rd84; +; CHECK-NEXT: shl.b64 %rd87, %rd116, 1; +; CHECK-NEXT: shr.u64 %rd88, %rd119, 63; +; CHECK-NEXT: or.b64 %rd89, %rd87, %rd88; +; CHECK-NEXT: shr.u64 %rd90, %rd118, 63; +; CHECK-NEXT: shl.b64 %rd91, %rd119, 1; +; CHECK-NEXT: or.b64 %rd92, %rd91, %rd90; +; CHECK-NEXT: shl.b64 %rd93, %rd118, 1; +; CHECK-NEXT: or.b64 %rd118, %rd112, %rd93; +; CHECK-NEXT: or.b64 %rd119, %rd109, %rd92; +; CHECK-NEXT: sub.cc.s64 %rd94, %rd35, %rd89; +; CHECK-NEXT: subc.cc.s64 %rd95, %rd36, %rd86; +; CHECK-NEXT: shr.s64 %rd96, %rd95, 63; +; CHECK-NEXT: and.b64 %rd112, %rd96, 1; +; CHECK-NEXT: and.b64 %rd97, %rd96, %rd3; +; CHECK-NEXT: and.b64 %rd98, %rd96, %rd4; +; CHECK-NEXT: sub.cc.s64 %rd116, %rd89, %rd97; +; CHECK-NEXT: subc.cc.s64 %rd117, %rd86, %rd98; +; CHECK-NEXT: add.cc.s64 %rd114, %rd114, -1; +; CHECK-NEXT: addc.cc.s64 %rd115, %rd115, -1; +; CHECK-NEXT: or.b64 %rd99, %rd114, %rd115; +; CHECK-NEXT: setp.eq.b64 %p19, %rd99, 0; +; CHECK-NEXT: @%p19 bra $L__BB4_4; ; CHECK-NEXT: bra.uni $L__BB4_2; ; CHECK-NEXT: $L__BB4_4: // %udiv-loop-exit -; CHECK-NEXT: shr.u64 %rd99, %rd117, 63; -; CHECK-NEXT: shl.b64 %rd100, %rd118, 1; -; CHECK-NEXT: or.b64 %rd101, %rd100, %rd99; -; CHECK-NEXT: shl.b64 %rd102, %rd117, 1; -; CHECK-NEXT: or.b64 %rd119, %rd111, %rd102; -; CHECK-NEXT: or.b64 %rd120, %rd108, %rd101; +; CHECK-NEXT: shr.u64 %rd100, %rd118, 63; +; CHECK-NEXT: shl.b64 %rd101, %rd119, 1; +; CHECK-NEXT: or.b64 %rd102, %rd101, %rd100; +; CHECK-NEXT: shl.b64 %rd103, %rd118, 1; +; CHECK-NEXT: or.b64 %rd120, %rd112, %rd103; +; CHECK-NEXT: or.b64 %rd121, %rd109, %rd102; ; CHECK-NEXT: $L__BB4_5: // %udiv-end -; CHECK-NEXT: xor.b64 %rd103, %rd119, %rd5; ; CHECK-NEXT: xor.b64 %rd104, %rd120, %rd5; -; CHECK-NEXT: sub.cc.s64 %rd105, %rd103, %rd5; -; CHECK-NEXT: subc.cc.s64 %rd106, %rd104, %rd5; -; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd105, %rd106}; +; CHECK-NEXT: xor.b64 %rd105, %rd121, %rd5; +; CHECK-NEXT: sub.cc.s64 %rd106, %rd104, %rd5; +; CHECK-NEXT: subc.cc.s64 %rd107, %rd105, %rd5; +; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd106, %rd107}; ; CHECK-NEXT: ret; %div = sdiv i128 %lhs, %rhs ret i128 %div @@ -446,7 +444,7 @@ define i128 @udiv_i128(i128 %lhs, i128 %rhs) { ; CHECK: { ; CHECK-NEXT: .reg .pred %p<18>; ; CHECK-NEXT: .reg .b32 %r<12>; -; CHECK-NEXT: .reg .b64 %rd<105>; +; CHECK-NEXT: .reg .b64 %rd<107>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: // %_udiv-special-cases ; CHECK-NEXT: ld.param.v2.b64 {%rd41, %rd42}, [udiv_i128_param_0]; @@ -470,92 +468,92 @@ define i128 @udiv_i128(i128 %lhs, i128 %rhs) { ; CHECK-NEXT: cvt.u64.u32 %rd52, %r4; ; CHECK-NEXT: add.s64 %rd53, %rd52, 64; ; CHECK-NEXT: selp.b64 %rd54, %rd51, %rd53, %p5; -; CHECK-NEXT: mov.b64 %rd95, 0; -; CHECK-NEXT: sub.cc.s64 %rd5, %rd50, %rd54; -; CHECK-NEXT: subc.cc.s64 %rd6, %rd95, 0; -; CHECK-NEXT: setp.gt.u64 %p6, %rd5, 127; -; CHECK-NEXT: setp.eq.b64 %p7, %rd6, 0; +; CHECK-NEXT: mov.b64 %rd97, 0; +; CHECK-NEXT: sub.cc.s64 %rd56, %rd50, %rd54; +; CHECK-NEXT: subc.cc.s64 %rd57, %rd97, 0; +; CHECK-NEXT: setp.gt.u64 %p6, %rd56, 127; +; CHECK-NEXT: setp.eq.b64 %p7, %rd57, 0; ; CHECK-NEXT: and.pred %p8, %p7, %p6; -; CHECK-NEXT: setp.ne.b64 %p9, %rd6, 0; +; CHECK-NEXT: setp.ne.b64 %p9, %rd57, 0; ; CHECK-NEXT: or.pred %p10, %p8, %p9; ; CHECK-NEXT: or.pred %p11, %p3, %p10; -; CHECK-NEXT: xor.b64 %rd56, %rd5, 127; -; CHECK-NEXT: or.b64 %rd57, %rd56, %rd6; -; CHECK-NEXT: setp.eq.b64 %p12, %rd57, 0; -; CHECK-NEXT: selp.b64 %rd104, 0, %rd42, %p11; -; CHECK-NEXT: selp.b64 %rd103, 0, %rd41, %p11; +; CHECK-NEXT: xor.b64 %rd58, %rd56, 127; +; CHECK-NEXT: or.b64 %rd59, %rd58, %rd57; +; CHECK-NEXT: setp.eq.b64 %p12, %rd59, 0; +; CHECK-NEXT: selp.b64 %rd106, 0, %rd42, %p11; +; CHECK-NEXT: selp.b64 %rd105, 0, %rd41, %p11; ; CHECK-NEXT: or.pred %p13, %p11, %p12; ; CHECK-NEXT: @%p13 bra $L__BB5_5; ; CHECK-NEXT: // %bb.3: // %udiv-bb1 -; CHECK-NEXT: add.cc.s64 %rd97, %rd5, 1; -; CHECK-NEXT: addc.cc.s64 %rd98, %rd6, 0; -; CHECK-NEXT: or.b64 %rd60, %rd97, %rd98; -; CHECK-NEXT: setp.eq.b64 %p14, %rd60, 0; -; CHECK-NEXT: cvt.u32.u64 %r5, %rd5; +; CHECK-NEXT: add.cc.s64 %rd99, %rd56, 1; +; CHECK-NEXT: addc.cc.s64 %rd100, %rd57, 0; +; CHECK-NEXT: or.b64 %rd62, %rd99, %rd100; +; CHECK-NEXT: setp.eq.b64 %p14, %rd62, 0; +; CHECK-NEXT: cvt.u32.u64 %r5, %rd56; ; CHECK-NEXT: sub.s32 %r6, 127, %r5; -; CHECK-NEXT: shl.b64 %rd61, %rd42, %r6; +; CHECK-NEXT: shl.b64 %rd63, %rd42, %r6; ; CHECK-NEXT: sub.s32 %r7, 64, %r6; -; CHECK-NEXT: shr.u64 %rd62, %rd41, %r7; -; CHECK-NEXT: or.b64 %rd63, %rd61, %rd62; +; CHECK-NEXT: shr.u64 %rd64, %rd41, %r7; +; CHECK-NEXT: or.b64 %rd65, %rd63, %rd64; ; CHECK-NEXT: sub.s32 %r8, 63, %r5; -; CHECK-NEXT: shl.b64 %rd64, %rd41, %r8; +; CHECK-NEXT: shl.b64 %rd66, %rd41, %r8; ; CHECK-NEXT: setp.gt.s32 %p15, %r6, 63; -; CHECK-NEXT: selp.b64 %rd102, %rd64, %rd63, %p15; -; CHECK-NEXT: shl.b64 %rd101, %rd41, %r6; -; CHECK-NEXT: mov.b64 %rd92, %rd95; +; CHECK-NEXT: selp.b64 %rd104, %rd66, %rd65, %p15; +; CHECK-NEXT: shl.b64 %rd103, %rd41, %r6; +; CHECK-NEXT: mov.b64 %rd94, %rd97; ; CHECK-NEXT: @%p14 bra $L__BB5_4; ; CHECK-NEXT: // %bb.1: // %udiv-preheader -; CHECK-NEXT: cvt.u32.u64 %r9, %rd97; -; CHECK-NEXT: shr.u64 %rd67, %rd41, %r9; +; CHECK-NEXT: cvt.u32.u64 %r9, %rd99; +; CHECK-NEXT: shr.u64 %rd69, %rd41, %r9; ; CHECK-NEXT: sub.s32 %r10, 64, %r9; -; CHECK-NEXT: shl.b64 %rd68, %rd42, %r10; -; CHECK-NEXT: or.b64 %rd69, %rd67, %rd68; +; CHECK-NEXT: shl.b64 %rd70, %rd42, %r10; +; CHECK-NEXT: or.b64 %rd71, %rd69, %rd70; ; CHECK-NEXT: add.s32 %r11, %r9, -64; -; CHECK-NEXT: shr.u64 %rd70, %rd42, %r11; +; CHECK-NEXT: shr.u64 %rd72, %rd42, %r11; ; CHECK-NEXT: setp.gt.s32 %p16, %r9, 63; -; CHECK-NEXT: selp.b64 %rd99, %rd70, %rd69, %p16; -; CHECK-NEXT: shr.u64 %rd100, %rd42, %r9; +; CHECK-NEXT: selp.b64 %rd101, %rd72, %rd71, %p16; +; CHECK-NEXT: shr.u64 %rd102, %rd42, %r9; ; CHECK-NEXT: add.cc.s64 %rd33, %rd43, -1; ; CHECK-NEXT: addc.cc.s64 %rd34, %rd44, -1; -; CHECK-NEXT: mov.b64 %rd92, 0; -; CHECK-NEXT: mov.b64 %rd95, %rd92; +; CHECK-NEXT: mov.b64 %rd94, 0; +; CHECK-NEXT: mov.b64 %rd97, %rd94; ; CHECK-NEXT: $L__BB5_2: // %udiv-do-while ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: shr.u64 %rd71, %rd99, 63; -; CHECK-NEXT: shl.b64 %rd72, %rd100, 1; -; CHECK-NEXT: or.b64 %rd73, %rd72, %rd71; -; CHECK-NEXT: shl.b64 %rd74, %rd99, 1; -; CHECK-NEXT: shr.u64 %rd75, %rd102, 63; -; CHECK-NEXT: or.b64 %rd76, %rd74, %rd75; -; CHECK-NEXT: shr.u64 %rd77, %rd101, 63; -; CHECK-NEXT: shl.b64 %rd78, %rd102, 1; -; CHECK-NEXT: or.b64 %rd79, %rd78, %rd77; -; CHECK-NEXT: shl.b64 %rd80, %rd101, 1; -; CHECK-NEXT: or.b64 %rd101, %rd95, %rd80; -; CHECK-NEXT: or.b64 %rd102, %rd92, %rd79; -; CHECK-NEXT: sub.cc.s64 %rd81, %rd33, %rd76; -; CHECK-NEXT: subc.cc.s64 %rd82, %rd34, %rd73; -; CHECK-NEXT: shr.s64 %rd83, %rd82, 63; -; CHECK-NEXT: and.b64 %rd95, %rd83, 1; -; CHECK-NEXT: and.b64 %rd84, %rd83, %rd43; -; CHECK-NEXT: and.b64 %rd85, %rd83, %rd44; -; CHECK-NEXT: sub.cc.s64 %rd99, %rd76, %rd84; -; CHECK-NEXT: subc.cc.s64 %rd100, %rd73, %rd85; -; CHECK-NEXT: add.cc.s64 %rd97, %rd97, -1; -; CHECK-NEXT: addc.cc.s64 %rd98, %rd98, -1; -; CHECK-NEXT: or.b64 %rd86, %rd97, %rd98; -; CHECK-NEXT: setp.eq.b64 %p17, %rd86, 0; +; CHECK-NEXT: shr.u64 %rd73, %rd101, 63; +; CHECK-NEXT: shl.b64 %rd74, %rd102, 1; +; CHECK-NEXT: or.b64 %rd75, %rd74, %rd73; +; CHECK-NEXT: shl.b64 %rd76, %rd101, 1; +; CHECK-NEXT: shr.u64 %rd77, %rd104, 63; +; CHECK-NEXT: or.b64 %rd78, %rd76, %rd77; +; CHECK-NEXT: shr.u64 %rd79, %rd103, 63; +; CHECK-NEXT: shl.b64 %rd80, %rd104, 1; +; CHECK-NEXT: or.b64 %rd81, %rd80, %rd79; +; CHECK-NEXT: shl.b64 %rd82, %rd103, 1; +; CHECK-NEXT: or.b64 %rd103, %rd97, %rd82; +; CHECK-NEXT: or.b64 %rd104, %rd94, %rd81; +; CHECK-NEXT: sub.cc.s64 %rd83, %rd33, %rd78; +; CHECK-NEXT: subc.cc.s64 %rd84, %rd34, %rd75; +; CHECK-NEXT: shr.s64 %rd85, %rd84, 63; +; CHECK-NEXT: and.b64 %rd97, %rd85, 1; +; CHECK-NEXT: and.b64 %rd86, %rd85, %rd43; +; CHECK-NEXT: and.b64 %rd87, %rd85, %rd44; +; CHECK-NEXT: sub.cc.s64 %rd101, %rd78, %rd86; +; CHECK-NEXT: subc.cc.s64 %rd102, %rd75, %rd87; +; CHECK-NEXT: add.cc.s64 %rd99, %rd99, -1; +; CHECK-NEXT: addc.cc.s64 %rd100, %rd100, -1; +; CHECK-NEXT: or.b64 %rd88, %rd99, %rd100; +; CHECK-NEXT: setp.eq.b64 %p17, %rd88, 0; ; CHECK-NEXT: @%p17 bra $L__BB5_4; ; CHECK-NEXT: bra.uni $L__BB5_2; ; CHECK-NEXT: $L__BB5_4: // %udiv-loop-exit -; CHECK-NEXT: shr.u64 %rd87, %rd101, 63; -; CHECK-NEXT: shl.b64 %rd88, %rd102, 1; -; CHECK-NEXT: or.b64 %rd89, %rd88, %rd87; -; CHECK-NEXT: shl.b64 %rd90, %rd101, 1; -; CHECK-NEXT: or.b64 %rd103, %rd95, %rd90; -; CHECK-NEXT: or.b64 %rd104, %rd92, %rd89; +; CHECK-NEXT: shr.u64 %rd89, %rd103, 63; +; CHECK-NEXT: shl.b64 %rd90, %rd104, 1; +; CHECK-NEXT: or.b64 %rd91, %rd90, %rd89; +; CHECK-NEXT: shl.b64 %rd92, %rd103, 1; +; CHECK-NEXT: or.b64 %rd105, %rd97, %rd92; +; CHECK-NEXT: or.b64 %rd106, %rd94, %rd91; ; CHECK-NEXT: $L__BB5_5: // %udiv-end -; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd103, %rd104}; +; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd105, %rd106}; ; CHECK-NEXT: ret; %div = udiv i128 %lhs, %rhs ret i128 %div diff --git a/llvm/test/CodeGen/NVPTX/i16x2-instructions.ll b/llvm/test/CodeGen/NVPTX/i16x2-instructions.ll index 2b7a06c..74136bb 100644 --- a/llvm/test/CodeGen/NVPTX/i16x2-instructions.ll +++ b/llvm/test/CodeGen/NVPTX/i16x2-instructions.ll @@ -642,10 +642,10 @@ define <2 x i16> @test_call(<2 x i16> %a, <2 x i16> %b) #0 { ; COMMON-NEXT: ld.param.b32 %r1, [test_call_param_0]; ; COMMON-NEXT: { // callseq 0, 0 ; COMMON-NEXT: .param .align 4 .b8 param0[4]; -; COMMON-NEXT: st.param.b32 [param0], %r1; ; COMMON-NEXT: .param .align 4 .b8 param1[4]; -; COMMON-NEXT: st.param.b32 [param1], %r2; ; COMMON-NEXT: .param .align 4 .b8 retval0[4]; +; COMMON-NEXT: st.param.b32 [param1], %r2; +; COMMON-NEXT: st.param.b32 [param0], %r1; ; COMMON-NEXT: call.uni (retval0), test_callee, (param0, param1); ; COMMON-NEXT: ld.param.b32 %r3, [retval0]; ; COMMON-NEXT: } // callseq 0 @@ -665,10 +665,10 @@ define <2 x i16> @test_call_flipped(<2 x i16> %a, <2 x i16> %b) #0 { ; COMMON-NEXT: ld.param.b32 %r1, [test_call_flipped_param_0]; ; COMMON-NEXT: { // callseq 1, 0 ; COMMON-NEXT: .param .align 4 .b8 param0[4]; -; COMMON-NEXT: st.param.b32 [param0], %r2; ; COMMON-NEXT: .param .align 4 .b8 param1[4]; -; COMMON-NEXT: st.param.b32 [param1], %r1; ; COMMON-NEXT: .param .align 4 .b8 retval0[4]; +; COMMON-NEXT: st.param.b32 [param1], %r1; +; COMMON-NEXT: st.param.b32 [param0], %r2; ; COMMON-NEXT: call.uni (retval0), test_callee, (param0, param1); ; COMMON-NEXT: ld.param.b32 %r3, [retval0]; ; COMMON-NEXT: } // callseq 1 @@ -688,10 +688,10 @@ define <2 x i16> @test_tailcall_flipped(<2 x i16> %a, <2 x i16> %b) #0 { ; COMMON-NEXT: ld.param.b32 %r1, [test_tailcall_flipped_param_0]; ; COMMON-NEXT: { // callseq 2, 0 ; COMMON-NEXT: .param .align 4 .b8 param0[4]; -; COMMON-NEXT: st.param.b32 [param0], %r2; ; COMMON-NEXT: .param .align 4 .b8 param1[4]; -; COMMON-NEXT: st.param.b32 [param1], %r1; ; COMMON-NEXT: .param .align 4 .b8 retval0[4]; +; COMMON-NEXT: st.param.b32 [param1], %r1; +; COMMON-NEXT: st.param.b32 [param0], %r2; ; COMMON-NEXT: call.uni (retval0), test_callee, (param0, param1); ; COMMON-NEXT: ld.param.b32 %r3, [retval0]; ; COMMON-NEXT: } // callseq 2 diff --git a/llvm/test/CodeGen/NVPTX/i8x2-instructions.ll b/llvm/test/CodeGen/NVPTX/i8x2-instructions.ll index 3edd4e4..98f94bb 100644 --- a/llvm/test/CodeGen/NVPTX/i8x2-instructions.ll +++ b/llvm/test/CodeGen/NVPTX/i8x2-instructions.ll @@ -1,42 +1,107 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 -; RUN: llc < %s -mtriple=nvptx64-nvidia-cuda -mcpu=sm_90 -mattr=+ptx80 \ -; RUN: -O0 -disable-post-ra -frame-pointer=all -verify-machineinstrs \ -; RUN: | FileCheck %s -; RUN: %if ptxas %{ \ -; RUN: llc < %s -mtriple=nvptx64-nvidia-cuda -mcpu=sm_90 -asm-verbose=false \ -; RUN: -O0 -disable-post-ra -frame-pointer=all -verify-machineinstrs \ -; RUN: | %ptxas-verify -arch=sm_90 \ +; RUN: llc < %s -mcpu=sm_90 -mattr=+ptx80 -disable-post-ra -frame-pointer=all \ +; RUN: -verify-machineinstrs -O0 | FileCheck %s --check-prefixes=O0,COMMON +; RUN: llc < %s -mcpu=sm_90 -mattr=+ptx80 -disable-post-ra -frame-pointer=all \ +; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=O3,COMMON +; RUN: %if ptxas %{ \ +; RUN: llc < %s -mcpu=sm_90 -mattr=+ptx80 -disable-post-ra -frame-pointer=all \ +; RUN: -verify-machineinstrs -O0 \ +; RUN: | %ptxas-verify -arch=sm_90 \ +; RUN: %} +; RUN: %if ptxas %{ \ +; RUN: llc < %s -mcpu=sm_90 -mattr=+ptx80 -disable-post-ra -frame-pointer=all \ +; RUN: -verify-machineinstrs \ +; RUN: | %ptxas-verify -arch=sm_90 \ ; RUN: %} +target triple = "nvptx64-nvidia-cuda" target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" define i16 @test_bitcast_2xi8_i16(<2 x i8> %a) { -; CHECK-LABEL: test_bitcast_2xi8_i16( -; CHECK: { -; CHECK-NEXT: .reg .b16 %rs<5>; -; CHECK-NEXT: .reg .b32 %r<3>; -; CHECK-EMPTY: -; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: ld.param.v2.b8 {%rs1, %rs2}, [test_bitcast_2xi8_i16_param_0]; -; CHECK-NEXT: mov.b32 %r1, {%rs1, %rs2}; -; CHECK-NEXT: shl.b16 %rs3, %rs2, 8; -; CHECK-NEXT: or.b16 %rs4, %rs1, %rs3; -; CHECK-NEXT: cvt.u32.u16 %r2, %rs4; -; CHECK-NEXT: st.param.b32 [func_retval0], %r2; -; CHECK-NEXT: ret; +; O0-LABEL: test_bitcast_2xi8_i16( +; O0: { +; O0-NEXT: .reg .b16 %rs<5>; +; O0-NEXT: .reg .b32 %r<3>; +; O0-EMPTY: +; O0-NEXT: // %bb.0: +; O0-NEXT: ld.param.v2.b8 {%rs1, %rs2}, [test_bitcast_2xi8_i16_param_0]; +; O0-NEXT: mov.b32 %r1, {%rs1, %rs2}; +; O0-NEXT: shl.b16 %rs3, %rs2, 8; +; O0-NEXT: or.b16 %rs4, %rs1, %rs3; +; O0-NEXT: cvt.u32.u16 %r2, %rs4; +; O0-NEXT: st.param.b32 [func_retval0], %r2; +; O0-NEXT: ret; +; +; O3-LABEL: test_bitcast_2xi8_i16( +; O3: { +; O3-NEXT: .reg .b32 %r<2>; +; O3-EMPTY: +; O3-NEXT: // %bb.0: +; O3-NEXT: ld.param.b16 %r1, [test_bitcast_2xi8_i16_param_0]; +; O3-NEXT: st.param.b32 [func_retval0], %r1; +; O3-NEXT: ret; %res = bitcast <2 x i8> %a to i16 ret i16 %res } define <2 x i8> @test_bitcast_i16_2xi8(i16 %a) { -; CHECK-LABEL: test_bitcast_i16_2xi8( -; CHECK: { -; CHECK-NEXT: .reg .b16 %rs<2>; -; CHECK-EMPTY: -; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: ld.param.b16 %rs1, [test_bitcast_i16_2xi8_param_0]; -; CHECK-NEXT: st.param.b16 [func_retval0], %rs1; -; CHECK-NEXT: ret; +; O0-LABEL: test_bitcast_i16_2xi8( +; O0: { +; O0-NEXT: .reg .b16 %rs<2>; +; O0-EMPTY: +; O0-NEXT: // %bb.0: +; O0-NEXT: ld.param.b16 %rs1, [test_bitcast_i16_2xi8_param_0]; +; O0-NEXT: st.param.b16 [func_retval0], %rs1; +; O0-NEXT: ret; +; +; O3-LABEL: test_bitcast_i16_2xi8( +; O3: { +; O3-NEXT: .reg .b16 %rs<2>; +; O3-EMPTY: +; O3-NEXT: // %bb.0: +; O3-NEXT: ld.param.b16 %rs1, [test_bitcast_i16_2xi8_param_0]; +; O3-NEXT: st.param.b16 [func_retval0], %rs1; +; O3-NEXT: ret; %res = bitcast i16 %a to <2 x i8> ret <2 x i8> %res } + +define <2 x i8> @test_call_2xi8(<2 x i8> %a) { +; O0-LABEL: test_call_2xi8( +; O0: { +; O0-NEXT: .reg .b16 %rs<7>; +; O0-NEXT: .reg .b32 %r<2>; +; O0-EMPTY: +; O0-NEXT: // %bb.0: +; O0-NEXT: ld.param.v2.b8 {%rs1, %rs2}, [test_call_2xi8_param_0]; +; O0-NEXT: mov.b32 %r1, {%rs1, %rs2}; +; O0-NEXT: { // callseq 0, 0 +; O0-NEXT: .param .align 2 .b8 param0[2]; +; O0-NEXT: .param .align 2 .b8 retval0[2]; +; O0-NEXT: st.param.v2.b8 [param0], {%rs1, %rs2}; +; O0-NEXT: call.uni (retval0), test_call_2xi8, (param0); +; O0-NEXT: ld.param.v2.b8 {%rs3, %rs4}, [retval0]; +; O0-NEXT: } // callseq 0 +; O0-NEXT: st.param.v2.b8 [func_retval0], {%rs3, %rs4}; +; O0-NEXT: ret; +; +; O3-LABEL: test_call_2xi8( +; O3: { +; O3-NEXT: .reg .b16 %rs<7>; +; O3-EMPTY: +; O3-NEXT: // %bb.0: +; O3-NEXT: ld.param.v2.b8 {%rs1, %rs2}, [test_call_2xi8_param_0]; +; O3-NEXT: { // callseq 0, 0 +; O3-NEXT: .param .align 2 .b8 param0[2]; +; O3-NEXT: .param .align 2 .b8 retval0[2]; +; O3-NEXT: st.param.v2.b8 [param0], {%rs1, %rs2}; +; O3-NEXT: call.uni (retval0), test_call_2xi8, (param0); +; O3-NEXT: ld.param.v2.b8 {%rs3, %rs4}, [retval0]; +; O3-NEXT: } // callseq 0 +; O3-NEXT: st.param.v2.b8 [func_retval0], {%rs3, %rs4}; +; O3-NEXT: ret; + %res = call <2 x i8> @test_call_2xi8(<2 x i8> %a) + ret <2 x i8> %res +} +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; COMMON: {{.*}} diff --git a/llvm/test/CodeGen/NVPTX/i8x4-instructions.ll b/llvm/test/CodeGen/NVPTX/i8x4-instructions.ll index 9891e33..06c2cc8 100644 --- a/llvm/test/CodeGen/NVPTX/i8x4-instructions.ll +++ b/llvm/test/CodeGen/NVPTX/i8x4-instructions.ll @@ -1273,10 +1273,10 @@ define <4 x i8> @test_call(<4 x i8> %a, <4 x i8> %b) #0 { ; O0-NEXT: ld.param.b32 %r1, [test_call_param_0]; ; O0-NEXT: { // callseq 0, 0 ; O0-NEXT: .param .align 4 .b8 param0[4]; -; O0-NEXT: st.param.b32 [param0], %r1; ; O0-NEXT: .param .align 4 .b8 param1[4]; -; O0-NEXT: st.param.b32 [param1], %r2; ; O0-NEXT: .param .align 4 .b8 retval0[4]; +; O0-NEXT: st.param.b32 [param1], %r2; +; O0-NEXT: st.param.b32 [param0], %r1; ; O0-NEXT: call.uni (retval0), test_callee, (param0, param1); ; O0-NEXT: ld.param.b32 %r3, [retval0]; ; O0-NEXT: } // callseq 0 @@ -1289,13 +1289,13 @@ define <4 x i8> @test_call(<4 x i8> %a, <4 x i8> %b) #0 { ; O3-EMPTY: ; O3-NEXT: // %bb.0: ; O3-NEXT: ld.param.b32 %r1, [test_call_param_0]; -; O3-NEXT: ld.param.b32 %r2, [test_call_param_1]; ; O3-NEXT: { // callseq 0, 0 ; O3-NEXT: .param .align 4 .b8 param0[4]; -; O3-NEXT: st.param.b32 [param0], %r1; ; O3-NEXT: .param .align 4 .b8 param1[4]; -; O3-NEXT: st.param.b32 [param1], %r2; ; O3-NEXT: .param .align 4 .b8 retval0[4]; +; O3-NEXT: ld.param.b32 %r2, [test_call_param_1]; +; O3-NEXT: st.param.b32 [param1], %r2; +; O3-NEXT: st.param.b32 [param0], %r1; ; O3-NEXT: call.uni (retval0), test_callee, (param0, param1); ; O3-NEXT: ld.param.b32 %r3, [retval0]; ; O3-NEXT: } // callseq 0 @@ -1315,10 +1315,10 @@ define <4 x i8> @test_call_flipped(<4 x i8> %a, <4 x i8> %b) #0 { ; O0-NEXT: ld.param.b32 %r1, [test_call_flipped_param_0]; ; O0-NEXT: { // callseq 1, 0 ; O0-NEXT: .param .align 4 .b8 param0[4]; -; O0-NEXT: st.param.b32 [param0], %r2; ; O0-NEXT: .param .align 4 .b8 param1[4]; -; O0-NEXT: st.param.b32 [param1], %r1; ; O0-NEXT: .param .align 4 .b8 retval0[4]; +; O0-NEXT: st.param.b32 [param1], %r1; +; O0-NEXT: st.param.b32 [param0], %r2; ; O0-NEXT: call.uni (retval0), test_callee, (param0, param1); ; O0-NEXT: ld.param.b32 %r3, [retval0]; ; O0-NEXT: } // callseq 1 @@ -1331,13 +1331,13 @@ define <4 x i8> @test_call_flipped(<4 x i8> %a, <4 x i8> %b) #0 { ; O3-EMPTY: ; O3-NEXT: // %bb.0: ; O3-NEXT: ld.param.b32 %r1, [test_call_flipped_param_0]; -; O3-NEXT: ld.param.b32 %r2, [test_call_flipped_param_1]; ; O3-NEXT: { // callseq 1, 0 ; O3-NEXT: .param .align 4 .b8 param0[4]; -; O3-NEXT: st.param.b32 [param0], %r2; ; O3-NEXT: .param .align 4 .b8 param1[4]; -; O3-NEXT: st.param.b32 [param1], %r1; ; O3-NEXT: .param .align 4 .b8 retval0[4]; +; O3-NEXT: st.param.b32 [param1], %r1; +; O3-NEXT: ld.param.b32 %r2, [test_call_flipped_param_1]; +; O3-NEXT: st.param.b32 [param0], %r2; ; O3-NEXT: call.uni (retval0), test_callee, (param0, param1); ; O3-NEXT: ld.param.b32 %r3, [retval0]; ; O3-NEXT: } // callseq 1 @@ -1357,10 +1357,10 @@ define <4 x i8> @test_tailcall_flipped(<4 x i8> %a, <4 x i8> %b) #0 { ; O0-NEXT: ld.param.b32 %r1, [test_tailcall_flipped_param_0]; ; O0-NEXT: { // callseq 2, 0 ; O0-NEXT: .param .align 4 .b8 param0[4]; -; O0-NEXT: st.param.b32 [param0], %r2; ; O0-NEXT: .param .align 4 .b8 param1[4]; -; O0-NEXT: st.param.b32 [param1], %r1; ; O0-NEXT: .param .align 4 .b8 retval0[4]; +; O0-NEXT: st.param.b32 [param1], %r1; +; O0-NEXT: st.param.b32 [param0], %r2; ; O0-NEXT: call.uni (retval0), test_callee, (param0, param1); ; O0-NEXT: ld.param.b32 %r3, [retval0]; ; O0-NEXT: } // callseq 2 @@ -1373,13 +1373,13 @@ define <4 x i8> @test_tailcall_flipped(<4 x i8> %a, <4 x i8> %b) #0 { ; O3-EMPTY: ; O3-NEXT: // %bb.0: ; O3-NEXT: ld.param.b32 %r1, [test_tailcall_flipped_param_0]; -; O3-NEXT: ld.param.b32 %r2, [test_tailcall_flipped_param_1]; ; O3-NEXT: { // callseq 2, 0 ; O3-NEXT: .param .align 4 .b8 param0[4]; -; O3-NEXT: st.param.b32 [param0], %r2; ; O3-NEXT: .param .align 4 .b8 param1[4]; -; O3-NEXT: st.param.b32 [param1], %r1; ; O3-NEXT: .param .align 4 .b8 retval0[4]; +; O3-NEXT: st.param.b32 [param1], %r1; +; O3-NEXT: ld.param.b32 %r2, [test_tailcall_flipped_param_1]; +; O3-NEXT: st.param.b32 [param0], %r2; ; O3-NEXT: call.uni (retval0), test_callee, (param0, param1); ; O3-NEXT: ld.param.b32 %r3, [retval0]; ; O3-NEXT: } // callseq 2 @@ -2044,7 +2044,7 @@ define void @test_srem_v4i8(ptr %a, ptr %b, ptr %c) { ; O0-LABEL: test_srem_v4i8( ; O0: { ; O0-NEXT: .reg .b16 %rs<13>; -; O0-NEXT: .reg .b32 %r<18>; +; O0-NEXT: .reg .b32 %r<16>; ; O0-NEXT: .reg .b64 %rd<4>; ; O0-EMPTY: ; O0-NEXT: // %bb.0: // %entry @@ -2066,27 +2066,25 @@ define void @test_srem_v4i8(ptr %a, ptr %b, ptr %c) { ; O0-NEXT: rem.s16 %rs6, %rs5, %rs4; ; O0-NEXT: cvt.u32.u16 %r8, %rs6; ; O0-NEXT: prmt.b32 %r9, %r8, %r5, 0x3340U; -; O0-NEXT: prmt.b32 %r10, %r2, 0, 0x9991U; -; O0-NEXT: cvt.u16.u32 %rs7, %r10; -; O0-NEXT: prmt.b32 %r11, %r1, 0, 0x9991U; -; O0-NEXT: cvt.u16.u32 %rs8, %r11; +; O0-NEXT: cvt.s8.s32 %rs7, %r2; +; O0-NEXT: cvt.s8.s32 %rs8, %r1; ; O0-NEXT: rem.s16 %rs9, %rs8, %rs7; -; O0-NEXT: cvt.u32.u16 %r12, %rs9; -; O0-NEXT: prmt.b32 %r13, %r2, 0, 0x8880U; -; O0-NEXT: cvt.u16.u32 %rs10, %r13; -; O0-NEXT: prmt.b32 %r14, %r1, 0, 0x8880U; -; O0-NEXT: cvt.u16.u32 %rs11, %r14; +; O0-NEXT: cvt.u32.u16 %r10, %rs9; +; O0-NEXT: prmt.b32 %r11, %r2, 0, 0x9991U; +; O0-NEXT: cvt.u16.u32 %rs10, %r11; +; O0-NEXT: prmt.b32 %r12, %r1, 0, 0x9991U; +; O0-NEXT: cvt.u16.u32 %rs11, %r12; ; O0-NEXT: rem.s16 %rs12, %rs11, %rs10; -; O0-NEXT: cvt.u32.u16 %r15, %rs12; -; O0-NEXT: prmt.b32 %r16, %r15, %r12, 0x3340U; -; O0-NEXT: prmt.b32 %r17, %r16, %r9, 0x5410U; -; O0-NEXT: st.b32 [%rd3], %r17; +; O0-NEXT: cvt.u32.u16 %r13, %rs12; +; O0-NEXT: prmt.b32 %r14, %r10, %r13, 0x3340U; +; O0-NEXT: prmt.b32 %r15, %r14, %r9, 0x5410U; +; O0-NEXT: st.b32 [%rd3], %r15; ; O0-NEXT: ret; ; ; O3-LABEL: test_srem_v4i8( ; O3: { ; O3-NEXT: .reg .b16 %rs<13>; -; O3-NEXT: .reg .b32 %r<18>; +; O3-NEXT: .reg .b32 %r<16>; ; O3-NEXT: .reg .b64 %rd<4>; ; O3-EMPTY: ; O3-NEXT: // %bb.0: // %entry @@ -2108,21 +2106,19 @@ define void @test_srem_v4i8(ptr %a, ptr %b, ptr %c) { ; O3-NEXT: rem.s16 %rs6, %rs5, %rs4; ; O3-NEXT: cvt.u32.u16 %r8, %rs6; ; O3-NEXT: prmt.b32 %r9, %r8, %r5, 0x3340U; -; O3-NEXT: prmt.b32 %r10, %r2, 0, 0x9991U; -; O3-NEXT: cvt.u16.u32 %rs7, %r10; -; O3-NEXT: prmt.b32 %r11, %r1, 0, 0x9991U; -; O3-NEXT: cvt.u16.u32 %rs8, %r11; +; O3-NEXT: cvt.s8.s32 %rs7, %r2; +; O3-NEXT: cvt.s8.s32 %rs8, %r1; ; O3-NEXT: rem.s16 %rs9, %rs8, %rs7; -; O3-NEXT: cvt.u32.u16 %r12, %rs9; -; O3-NEXT: prmt.b32 %r13, %r2, 0, 0x8880U; -; O3-NEXT: cvt.u16.u32 %rs10, %r13; -; O3-NEXT: prmt.b32 %r14, %r1, 0, 0x8880U; -; O3-NEXT: cvt.u16.u32 %rs11, %r14; +; O3-NEXT: cvt.u32.u16 %r10, %rs9; +; O3-NEXT: prmt.b32 %r11, %r2, 0, 0x9991U; +; O3-NEXT: cvt.u16.u32 %rs10, %r11; +; O3-NEXT: prmt.b32 %r12, %r1, 0, 0x9991U; +; O3-NEXT: cvt.u16.u32 %rs11, %r12; ; O3-NEXT: rem.s16 %rs12, %rs11, %rs10; -; O3-NEXT: cvt.u32.u16 %r15, %rs12; -; O3-NEXT: prmt.b32 %r16, %r15, %r12, 0x3340U; -; O3-NEXT: prmt.b32 %r17, %r16, %r9, 0x5410U; -; O3-NEXT: st.b32 [%rd3], %r17; +; O3-NEXT: cvt.u32.u16 %r13, %rs12; +; O3-NEXT: prmt.b32 %r14, %r10, %r13, 0x3340U; +; O3-NEXT: prmt.b32 %r15, %r14, %r9, 0x5410U; +; O3-NEXT: st.b32 [%rd3], %r15; ; O3-NEXT: ret; entry: %t57 = load <4 x i8>, ptr %a, align 4 @@ -2142,7 +2138,7 @@ define void @test_srem_v3i8(ptr %a, ptr %b, ptr %c) { ; O0-LABEL: test_srem_v3i8( ; O0: { ; O0-NEXT: .reg .b16 %rs<20>; -; O0-NEXT: .reg .b32 %r<14>; +; O0-NEXT: .reg .b32 %r<8>; ; O0-NEXT: .reg .b64 %rd<4>; ; O0-EMPTY: ; O0-NEXT: // %bb.0: // %entry @@ -2161,25 +2157,19 @@ define void @test_srem_v3i8(ptr %a, ptr %b, ptr %c) { ; O0-NEXT: or.b16 %rs9, %rs8, %rs6; ; O0-NEXT: cvt.u32.u16 %r2, %rs9; ; O0-NEXT: ld.s8 %rs10, [%rd2+2]; -; O0-NEXT: prmt.b32 %r3, %r2, 0, 0x9991U; -; O0-NEXT: cvt.u16.u32 %rs11, %r3; -; O0-NEXT: prmt.b32 %r4, %r1, 0, 0x9991U; -; O0-NEXT: cvt.u16.u32 %rs12, %r4; +; O0-NEXT: cvt.s16.s8 %rs11, %rs9; +; O0-NEXT: cvt.s16.s8 %rs12, %rs4; ; O0-NEXT: rem.s16 %rs13, %rs12, %rs11; -; O0-NEXT: cvt.u32.u16 %r5, %rs13; -; O0-NEXT: prmt.b32 %r6, %r2, 0, 0x8880U; -; O0-NEXT: cvt.u16.u32 %rs14, %r6; -; O0-NEXT: prmt.b32 %r7, %r1, 0, 0x8880U; -; O0-NEXT: cvt.u16.u32 %rs15, %r7; +; O0-NEXT: cvt.u32.u16 %r3, %rs13; +; O0-NEXT: prmt.b32 %r4, %r2, 0, 0x9991U; +; O0-NEXT: cvt.u16.u32 %rs14, %r4; +; O0-NEXT: prmt.b32 %r5, %r1, 0, 0x9991U; +; O0-NEXT: cvt.u16.u32 %rs15, %r5; ; O0-NEXT: rem.s16 %rs16, %rs15, %rs14; -; O0-NEXT: cvt.u32.u16 %r8, %rs16; -; O0-NEXT: prmt.b32 %r9, %r8, %r5, 0x3340U; -; O0-NEXT: // implicit-def: %r11 -; O0-NEXT: // implicit-def: %r12 -; O0-NEXT: prmt.b32 %r10, %r11, %r12, 0x3340U; -; O0-NEXT: prmt.b32 %r13, %r9, %r10, 0x5410U; +; O0-NEXT: cvt.u32.u16 %r6, %rs16; +; O0-NEXT: prmt.b32 %r7, %r3, %r6, 0x3340U; ; O0-NEXT: rem.s16 %rs17, %rs5, %rs10; -; O0-NEXT: cvt.u16.u32 %rs18, %r13; +; O0-NEXT: cvt.u16.u32 %rs18, %r7; ; O0-NEXT: st.b8 [%rd3], %rs18; ; O0-NEXT: shr.u16 %rs19, %rs18, 8; ; O0-NEXT: st.b8 [%rd3+1], %rs19; @@ -2189,7 +2179,7 @@ define void @test_srem_v3i8(ptr %a, ptr %b, ptr %c) { ; O3-LABEL: test_srem_v3i8( ; O3: { ; O3-NEXT: .reg .b16 %rs<20>; -; O3-NEXT: .reg .b32 %r<14>; +; O3-NEXT: .reg .b32 %r<8>; ; O3-NEXT: .reg .b64 %rd<4>; ; O3-EMPTY: ; O3-NEXT: // %bb.0: // %entry @@ -2208,24 +2198,20 @@ define void @test_srem_v3i8(ptr %a, ptr %b, ptr %c) { ; O3-NEXT: cvt.u32.u16 %r2, %rs9; ; O3-NEXT: ld.s8 %rs10, [%rd2+2]; ; O3-NEXT: ld.param.b64 %rd3, [test_srem_v3i8_param_2]; -; O3-NEXT: prmt.b32 %r3, %r2, 0, 0x9991U; -; O3-NEXT: cvt.u16.u32 %rs11, %r3; -; O3-NEXT: prmt.b32 %r4, %r1, 0, 0x9991U; -; O3-NEXT: cvt.u16.u32 %rs12, %r4; +; O3-NEXT: cvt.s16.s8 %rs11, %rs9; +; O3-NEXT: cvt.s16.s8 %rs12, %rs4; ; O3-NEXT: rem.s16 %rs13, %rs12, %rs11; -; O3-NEXT: cvt.u32.u16 %r5, %rs13; -; O3-NEXT: prmt.b32 %r6, %r2, 0, 0x8880U; -; O3-NEXT: cvt.u16.u32 %rs14, %r6; -; O3-NEXT: prmt.b32 %r7, %r1, 0, 0x8880U; -; O3-NEXT: cvt.u16.u32 %rs15, %r7; +; O3-NEXT: cvt.u32.u16 %r3, %rs13; +; O3-NEXT: prmt.b32 %r4, %r2, 0, 0x9991U; +; O3-NEXT: cvt.u16.u32 %rs14, %r4; +; O3-NEXT: prmt.b32 %r5, %r1, 0, 0x9991U; +; O3-NEXT: cvt.u16.u32 %rs15, %r5; ; O3-NEXT: rem.s16 %rs16, %rs15, %rs14; -; O3-NEXT: cvt.u32.u16 %r8, %rs16; -; O3-NEXT: prmt.b32 %r9, %r8, %r5, 0x3340U; -; O3-NEXT: prmt.b32 %r10, %r11, %r12, 0x3340U; -; O3-NEXT: prmt.b32 %r13, %r9, %r10, 0x5410U; +; O3-NEXT: cvt.u32.u16 %r6, %rs16; +; O3-NEXT: prmt.b32 %r7, %r3, %r6, 0x3340U; ; O3-NEXT: rem.s16 %rs17, %rs5, %rs10; ; O3-NEXT: st.b8 [%rd3+2], %rs17; -; O3-NEXT: cvt.u16.u32 %rs18, %r13; +; O3-NEXT: cvt.u16.u32 %rs18, %r7; ; O3-NEXT: st.b8 [%rd3], %rs18; ; O3-NEXT: shr.u16 %rs19, %rs18, 8; ; O3-NEXT: st.b8 [%rd3+1], %rs19; @@ -2340,23 +2326,22 @@ define <4 x float> @test_sitofp_v4i8(<4 x i8> %a) { ; CHECK-LABEL: test_sitofp_v4i8( ; CHECK: { ; CHECK-NEXT: .reg .b16 %rs<5>; -; CHECK-NEXT: .reg .b32 %r<10>; +; CHECK-NEXT: .reg .b32 %r<9>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b32 %r1, [test_sitofp_v4i8_param_0]; -; CHECK-NEXT: prmt.b32 %r2, %r1, 0, 0xbbb3U; -; CHECK-NEXT: cvt.u16.u32 %rs1, %r2; -; CHECK-NEXT: cvt.rn.f32.s16 %r3, %rs1; -; CHECK-NEXT: prmt.b32 %r4, %r1, 0, 0xaaa2U; -; CHECK-NEXT: cvt.u16.u32 %rs2, %r4; -; CHECK-NEXT: cvt.rn.f32.s16 %r5, %rs2; -; CHECK-NEXT: prmt.b32 %r6, %r1, 0, 0x9991U; -; CHECK-NEXT: cvt.u16.u32 %rs3, %r6; -; CHECK-NEXT: cvt.rn.f32.s16 %r7, %rs3; -; CHECK-NEXT: prmt.b32 %r8, %r1, 0, 0x8880U; -; CHECK-NEXT: cvt.u16.u32 %rs4, %r8; -; CHECK-NEXT: cvt.rn.f32.s16 %r9, %rs4; -; CHECK-NEXT: st.param.v4.b32 [func_retval0], {%r9, %r7, %r5, %r3}; +; CHECK-NEXT: cvt.s8.s32 %rs1, %r1; +; CHECK-NEXT: cvt.rn.f32.s16 %r2, %rs1; +; CHECK-NEXT: prmt.b32 %r3, %r1, 0, 0xbbb3U; +; CHECK-NEXT: cvt.u16.u32 %rs2, %r3; +; CHECK-NEXT: cvt.rn.f32.s16 %r4, %rs2; +; CHECK-NEXT: prmt.b32 %r5, %r1, 0, 0xaaa2U; +; CHECK-NEXT: cvt.u16.u32 %rs3, %r5; +; CHECK-NEXT: cvt.rn.f32.s16 %r6, %rs3; +; CHECK-NEXT: prmt.b32 %r7, %r1, 0, 0x9991U; +; CHECK-NEXT: cvt.u16.u32 %rs4, %r7; +; CHECK-NEXT: cvt.rn.f32.s16 %r8, %rs4; +; CHECK-NEXT: st.param.v4.b32 [func_retval0], {%r2, %r8, %r6, %r4}; ; CHECK-NEXT: ret; %r = sitofp <4 x i8> %a to <4 x float> ret <4 x float> %r diff --git a/llvm/test/CodeGen/NVPTX/idioms.ll b/llvm/test/CodeGen/NVPTX/idioms.ll index be84f9b..a3bf892 100644 --- a/llvm/test/CodeGen/NVPTX/idioms.ll +++ b/llvm/test/CodeGen/NVPTX/idioms.ll @@ -173,8 +173,8 @@ define %struct.S16 @i32_to_2xi16_shr(i32 noundef %i){ ; CHECK-NEXT: } // callseq 0 ; CHECK-NEXT: shr.s32 %r2, %r1, 16; ; CHECK-NEXT: shr.u32 %r3, %r2, 16; -; CHECK-NEXT: st.param.b16 [func_retval0], %r2; ; CHECK-NEXT: st.param.b16 [func_retval0+2], %r3; +; CHECK-NEXT: st.param.b16 [func_retval0], %r2; ; CHECK-NEXT: ret; call void @escape_int(i32 %i); // Force %i to be loaded completely. %i1 = ashr i32 %i, 16 diff --git a/llvm/test/CodeGen/NVPTX/indirect_byval.ll b/llvm/test/CodeGen/NVPTX/indirect_byval.ll index eae0321..782e672 100644 --- a/llvm/test/CodeGen/NVPTX/indirect_byval.ll +++ b/llvm/test/CodeGen/NVPTX/indirect_byval.ll @@ -23,15 +23,15 @@ define internal i32 @foo() { ; CHECK-NEXT: mov.b64 %SPL, __local_depot0; ; CHECK-NEXT: cvta.local.u64 %SP, %SPL; ; CHECK-NEXT: ld.global.b64 %rd1, [ptr]; -; CHECK-NEXT: add.u64 %rd3, %SPL, 1; -; CHECK-NEXT: ld.local.b8 %rs1, [%rd3]; -; CHECK-NEXT: add.u64 %rd4, %SP, 0; ; CHECK-NEXT: { // callseq 0, 0 ; CHECK-NEXT: .param .align 1 .b8 param0[1]; -; CHECK-NEXT: st.param.b8 [param0], %rs1; ; CHECK-NEXT: .param .b64 param1; -; CHECK-NEXT: st.param.b64 [param1], %rd4; ; CHECK-NEXT: .param .b32 retval0; +; CHECK-NEXT: add.u64 %rd2, %SP, 0; +; CHECK-NEXT: st.param.b64 [param1], %rd2; +; CHECK-NEXT: add.u64 %rd4, %SPL, 1; +; CHECK-NEXT: ld.local.b8 %rs1, [%rd4]; +; CHECK-NEXT: st.param.b8 [param0], %rs1; ; CHECK-NEXT: prototype_0 : .callprototype (.param .b32 _) _ (.param .align 1 .b8 _[1], .param .b64 _); ; CHECK-NEXT: call (retval0), %rd1, (param0, param1), prototype_0; ; CHECK-NEXT: ld.param.b32 %r1, [retval0]; @@ -60,15 +60,15 @@ define internal i32 @bar() { ; CHECK-NEXT: mov.b64 %SPL, __local_depot1; ; CHECK-NEXT: cvta.local.u64 %SP, %SPL; ; CHECK-NEXT: ld.global.b64 %rd1, [ptr]; -; CHECK-NEXT: add.u64 %rd3, %SPL, 8; -; CHECK-NEXT: ld.local.b64 %rd4, [%rd3]; -; CHECK-NEXT: add.u64 %rd5, %SP, 0; ; CHECK-NEXT: { // callseq 1, 0 ; CHECK-NEXT: .param .align 8 .b8 param0[8]; -; CHECK-NEXT: st.param.b64 [param0], %rd4; ; CHECK-NEXT: .param .b64 param1; -; CHECK-NEXT: st.param.b64 [param1], %rd5; ; CHECK-NEXT: .param .b32 retval0; +; CHECK-NEXT: add.u64 %rd2, %SP, 0; +; CHECK-NEXT: st.param.b64 [param1], %rd2; +; CHECK-NEXT: add.u64 %rd4, %SPL, 8; +; CHECK-NEXT: ld.local.b64 %rd5, [%rd4]; +; CHECK-NEXT: st.param.b64 [param0], %rd5; ; CHECK-NEXT: prototype_1 : .callprototype (.param .b32 _) _ (.param .align 8 .b8 _[8], .param .b64 _); ; CHECK-NEXT: call (retval0), %rd1, (param0, param1), prototype_1; ; CHECK-NEXT: ld.param.b32 %r1, [retval0]; diff --git a/llvm/test/CodeGen/NVPTX/local-stack-frame.ll b/llvm/test/CodeGen/NVPTX/local-stack-frame.ll index 5c30173..ae069cf 100644 --- a/llvm/test/CodeGen/NVPTX/local-stack-frame.ll +++ b/llvm/test/CodeGen/NVPTX/local-stack-frame.ll @@ -114,15 +114,14 @@ define void @foo3(i32 %a) { ; PTX64-NEXT: .reg .b64 %SP; ; PTX64-NEXT: .reg .b64 %SPL; ; PTX64-NEXT: .reg .b32 %r<2>; -; PTX64-NEXT: .reg .b64 %rd<5>; +; PTX64-NEXT: .reg .b64 %rd<4>; ; PTX64-EMPTY: ; PTX64-NEXT: // %bb.0: ; PTX64-NEXT: mov.b64 %SPL, __local_depot2; ; PTX64-NEXT: ld.param.b32 %r1, [foo3_param_0]; ; PTX64-NEXT: add.u64 %rd2, %SPL, 0; -; PTX64-NEXT: mul.wide.s32 %rd3, %r1, 4; -; PTX64-NEXT: add.s64 %rd4, %rd2, %rd3; -; PTX64-NEXT: st.local.b32 [%rd4], %r1; +; PTX64-NEXT: mad.wide.s32 %rd3, %r1, 4, %rd2; +; PTX64-NEXT: st.local.b32 [%rd3], %r1; ; PTX64-NEXT: ret; %local = alloca [3 x i32], align 4 %1 = getelementptr inbounds i32, ptr %local, i32 %a diff --git a/llvm/test/CodeGen/NVPTX/lower-args-gridconstant.ll b/llvm/test/CodeGen/NVPTX/lower-args-gridconstant.ll index 321a624..38185c7b 100644 --- a/llvm/test/CodeGen/NVPTX/lower-args-gridconstant.ll +++ b/llvm/test/CodeGen/NVPTX/lower-args-gridconstant.ll @@ -121,20 +121,18 @@ define ptx_kernel void @grid_const_struct(ptr byval(%struct.s) align 4 %input, p define ptx_kernel void @grid_const_escape(ptr byval(%struct.s) align 4 %input) { ; PTX-LABEL: grid_const_escape( ; PTX: { -; PTX-NEXT: .reg .b32 %r<2>; ; PTX-NEXT: .reg .b64 %rd<4>; ; PTX-EMPTY: ; PTX-NEXT: // %bb.0: ; PTX-NEXT: mov.b64 %rd2, grid_const_escape_param_0; ; PTX-NEXT: cvta.param.u64 %rd3, %rd2; -; PTX-NEXT: mov.b64 %rd1, escape; ; PTX-NEXT: { // callseq 0, 0 ; PTX-NEXT: .param .b64 param0; -; PTX-NEXT: st.param.b64 [param0], %rd3; ; PTX-NEXT: .param .b32 retval0; +; PTX-NEXT: st.param.b64 [param0], %rd3; ; PTX-NEXT: prototype_0 : .callprototype (.param .b32 _) _ (.param .b64 _); +; PTX-NEXT: mov.b64 %rd1, escape; ; PTX-NEXT: call (retval0), %rd1, (param0), prototype_0; -; PTX-NEXT: ld.param.b32 %r1, [retval0]; ; PTX-NEXT: } // callseq 0 ; PTX-NEXT: ret; ; OPT-LABEL: define ptx_kernel void @grid_const_escape( @@ -153,7 +151,7 @@ define ptx_kernel void @multiple_grid_const_escape(ptr byval(%struct.s) align 4 ; PTX-NEXT: .local .align 4 .b8 __local_depot4[4]; ; PTX-NEXT: .reg .b64 %SP; ; PTX-NEXT: .reg .b64 %SPL; -; PTX-NEXT: .reg .b32 %r<3>; +; PTX-NEXT: .reg .b32 %r<2>; ; PTX-NEXT: .reg .b64 %rd<8>; ; PTX-EMPTY: ; PTX-NEXT: // %bb.0: @@ -167,18 +165,17 @@ define ptx_kernel void @multiple_grid_const_escape(ptr byval(%struct.s) align 4 ; PTX-NEXT: add.u64 %rd6, %SP, 0; ; PTX-NEXT: add.u64 %rd7, %SPL, 0; ; PTX-NEXT: st.local.b32 [%rd7], %r1; -; PTX-NEXT: mov.b64 %rd1, escape3; ; PTX-NEXT: { // callseq 1, 0 ; PTX-NEXT: .param .b64 param0; -; PTX-NEXT: st.param.b64 [param0], %rd5; ; PTX-NEXT: .param .b64 param1; -; PTX-NEXT: st.param.b64 [param1], %rd6; ; PTX-NEXT: .param .b64 param2; -; PTX-NEXT: st.param.b64 [param2], %rd4; ; PTX-NEXT: .param .b32 retval0; +; PTX-NEXT: st.param.b64 [param2], %rd4; +; PTX-NEXT: st.param.b64 [param1], %rd6; +; PTX-NEXT: st.param.b64 [param0], %rd5; ; PTX-NEXT: prototype_1 : .callprototype (.param .b32 _) _ (.param .b64 _, .param .b64 _, .param .b64 _); +; PTX-NEXT: mov.b64 %rd1, escape3; ; PTX-NEXT: call (retval0), %rd1, (param0, param1, param2), prototype_1; -; PTX-NEXT: ld.param.b32 %r2, [retval0]; ; PTX-NEXT: } // callseq 1 ; PTX-NEXT: ret; ; OPT-LABEL: define ptx_kernel void @multiple_grid_const_escape( @@ -255,7 +252,7 @@ define ptx_kernel void @grid_const_inlineasm_escape(ptr byval(%struct.s) align 4 define ptx_kernel void @grid_const_partial_escape(ptr byval(i32) %input, ptr %output) { ; PTX-LABEL: grid_const_partial_escape( ; PTX: { -; PTX-NEXT: .reg .b32 %r<4>; +; PTX-NEXT: .reg .b32 %r<3>; ; PTX-NEXT: .reg .b64 %rd<6>; ; PTX-EMPTY: ; PTX-NEXT: // %bb.0: @@ -266,14 +263,13 @@ define ptx_kernel void @grid_const_partial_escape(ptr byval(i32) %input, ptr %ou ; PTX-NEXT: ld.param.b32 %r1, [grid_const_partial_escape_param_0]; ; PTX-NEXT: add.s32 %r2, %r1, %r1; ; PTX-NEXT: st.global.b32 [%rd4], %r2; -; PTX-NEXT: mov.b64 %rd1, escape; ; PTX-NEXT: { // callseq 2, 0 ; PTX-NEXT: .param .b64 param0; -; PTX-NEXT: st.param.b64 [param0], %rd5; ; PTX-NEXT: .param .b32 retval0; +; PTX-NEXT: st.param.b64 [param0], %rd5; ; PTX-NEXT: prototype_2 : .callprototype (.param .b32 _) _ (.param .b64 _); +; PTX-NEXT: mov.b64 %rd1, escape; ; PTX-NEXT: call (retval0), %rd1, (param0), prototype_2; -; PTX-NEXT: ld.param.b32 %r3, [retval0]; ; PTX-NEXT: } // callseq 2 ; PTX-NEXT: ret; ; OPT-LABEL: define ptx_kernel void @grid_const_partial_escape( @@ -295,7 +291,7 @@ define ptx_kernel void @grid_const_partial_escape(ptr byval(i32) %input, ptr %ou define ptx_kernel i32 @grid_const_partial_escapemem(ptr byval(%struct.s) %input, ptr %output) { ; PTX-LABEL: grid_const_partial_escapemem( ; PTX: { -; PTX-NEXT: .reg .b32 %r<5>; +; PTX-NEXT: .reg .b32 %r<4>; ; PTX-NEXT: .reg .b64 %rd<6>; ; PTX-EMPTY: ; PTX-NEXT: // %bb.0: @@ -307,14 +303,13 @@ define ptx_kernel i32 @grid_const_partial_escapemem(ptr byval(%struct.s) %input, ; PTX-NEXT: ld.param.b32 %r2, [grid_const_partial_escapemem_param_0+4]; ; PTX-NEXT: st.global.b64 [%rd4], %rd5; ; PTX-NEXT: add.s32 %r3, %r1, %r2; -; PTX-NEXT: mov.b64 %rd1, escape; ; PTX-NEXT: { // callseq 3, 0 ; PTX-NEXT: .param .b64 param0; -; PTX-NEXT: st.param.b64 [param0], %rd5; ; PTX-NEXT: .param .b32 retval0; +; PTX-NEXT: st.param.b64 [param0], %rd5; ; PTX-NEXT: prototype_3 : .callprototype (.param .b32 _) _ (.param .b64 _); +; PTX-NEXT: mov.b64 %rd1, escape; ; PTX-NEXT: call (retval0), %rd1, (param0), prototype_3; -; PTX-NEXT: ld.param.b32 %r4, [retval0]; ; PTX-NEXT: } // callseq 3 ; PTX-NEXT: st.param.b32 [func_retval0], %r3; ; PTX-NEXT: ret; @@ -535,9 +530,9 @@ define ptx_kernel void @test_forward_byval_arg(ptr byval(i32) align 4 %input) { ; PTX-NEXT: .reg .b32 %r<2>; ; PTX-EMPTY: ; PTX-NEXT: // %bb.0: -; PTX-NEXT: ld.param.b32 %r1, [test_forward_byval_arg_param_0]; ; PTX-NEXT: { // callseq 4, 0 ; PTX-NEXT: .param .align 4 .b8 param0[4]; +; PTX-NEXT: ld.param.b32 %r1, [test_forward_byval_arg_param_0]; ; PTX-NEXT: st.param.b32 [param0], %r1; ; PTX-NEXT: call.uni device_func, (param0); ; PTX-NEXT: } // callseq 4 diff --git a/llvm/test/CodeGen/NVPTX/lower-args.ll b/llvm/test/CodeGen/NVPTX/lower-args.ll index c165de7..7c029ab 100644 --- a/llvm/test/CodeGen/NVPTX/lower-args.ll +++ b/llvm/test/CodeGen/NVPTX/lower-args.ll @@ -31,7 +31,7 @@ define void @load_alignment(ptr nocapture readonly byval(%class.outer) align 8 % ; PTX-LABEL: load_alignment( ; PTX: { ; PTX-NEXT: .reg .b32 %r<4>; -; PTX-NEXT: .reg .b64 %rd<7>; +; PTX-NEXT: .reg .b64 %rd<6>; ; PTX-EMPTY: ; PTX-NEXT: // %bb.0: // %entry ; PTX-NEXT: mov.b64 %rd1, load_alignment_param_0; @@ -45,10 +45,9 @@ define void @load_alignment(ptr nocapture readonly byval(%class.outer) align 8 % ; PTX-NEXT: st.b32 [%rd3], %r3; ; PTX-NEXT: { // callseq 0, 0 ; PTX-NEXT: .param .b64 param0; -; PTX-NEXT: st.param.b64 [param0], %rd5; ; PTX-NEXT: .param .b64 retval0; +; PTX-NEXT: st.param.b64 [param0], %rd5; ; PTX-NEXT: call.uni (retval0), escape, (param0); -; PTX-NEXT: ld.param.b64 %rd6, [retval0]; ; PTX-NEXT: } // callseq 0 ; PTX-NEXT: ret; entry: @@ -76,17 +75,16 @@ define void @load_padding(ptr nocapture readonly byval(%class.padded) %arg) { ; ; PTX-LABEL: load_padding( ; PTX: { -; PTX-NEXT: .reg .b64 %rd<4>; +; PTX-NEXT: .reg .b64 %rd<3>; ; PTX-EMPTY: ; PTX-NEXT: // %bb.0: ; PTX-NEXT: mov.b64 %rd1, load_padding_param_0; ; PTX-NEXT: cvta.local.u64 %rd2, %rd1; ; PTX-NEXT: { // callseq 1, 0 ; PTX-NEXT: .param .b64 param0; -; PTX-NEXT: st.param.b64 [param0], %rd2; ; PTX-NEXT: .param .b64 retval0; +; PTX-NEXT: st.param.b64 [param0], %rd2; ; PTX-NEXT: call.uni (retval0), escape, (param0); -; PTX-NEXT: ld.param.b64 %rd3, [retval0]; ; PTX-NEXT: } // callseq 1 ; PTX-NEXT: ret; %tmp = call ptr @escape(ptr nonnull align 16 %arg) diff --git a/llvm/test/CodeGen/NVPTX/lower-byval-args.ll b/llvm/test/CodeGen/NVPTX/lower-byval-args.ll index 4784d70..20a3519 100644 --- a/llvm/test/CodeGen/NVPTX/lower-byval-args.ll +++ b/llvm/test/CodeGen/NVPTX/lower-byval-args.ll @@ -911,9 +911,9 @@ define void @device_func(ptr byval(i32) align 4 %input) { ; PTX-NEXT: .reg .b64 %rd<2>; ; PTX-EMPTY: ; PTX-NEXT: // %bb.0: -; PTX-NEXT: ld.param.b32 %r1, [device_func_param_0]; ; PTX-NEXT: { // callseq 3, 0 ; PTX-NEXT: .param .align 4 .b8 param0[4]; +; PTX-NEXT: ld.param.b32 %r1, [device_func_param_0]; ; PTX-NEXT: st.param.b32 [param0], %r1; ; PTX-NEXT: call.uni device_func, (param0); ; PTX-NEXT: } // callseq 3 diff --git a/llvm/test/CodeGen/NVPTX/misched_func_call.ll b/llvm/test/CodeGen/NVPTX/misched_func_call.ll index 8401f45..b2994c0 100644 --- a/llvm/test/CodeGen/NVPTX/misched_func_call.ll +++ b/llvm/test/CodeGen/NVPTX/misched_func_call.ll @@ -8,7 +8,7 @@ define ptx_kernel void @wombat(i32 %arg, i32 %arg1, i32 %arg2) { ; CHECK-LABEL: wombat( ; CHECK: { ; CHECK-NEXT: .reg .b32 %r<11>; -; CHECK-NEXT: .reg .b64 %rd<6>; +; CHECK-NEXT: .reg .b64 %rd<5>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: // %bb ; CHECK-NEXT: ld.param.b32 %r4, [wombat_param_2]; @@ -19,19 +19,18 @@ define ptx_kernel void @wombat(i32 %arg, i32 %arg1, i32 %arg2) { ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: { // callseq 0, 0 ; CHECK-NEXT: .param .b64 param0; -; CHECK-NEXT: st.param.b64 [param0], 0d0000000000000000; ; CHECK-NEXT: .param .b64 retval0; +; CHECK-NEXT: st.param.b64 [param0], 0; ; CHECK-NEXT: call.uni (retval0), quux, (param0); -; CHECK-NEXT: ld.param.b64 %rd1, [retval0]; ; CHECK-NEXT: } // callseq 0 ; CHECK-NEXT: mul.lo.s32 %r7, %r10, %r3; ; CHECK-NEXT: or.b32 %r8, %r4, %r7; ; CHECK-NEXT: mul.lo.s32 %r9, %r2, %r8; -; CHECK-NEXT: cvt.rn.f64.s32 %rd2, %r9; -; CHECK-NEXT: cvt.rn.f64.u32 %rd3, %r10; -; CHECK-NEXT: add.rn.f64 %rd4, %rd3, %rd2; -; CHECK-NEXT: mov.b64 %rd5, 0; -; CHECK-NEXT: st.global.b64 [%rd5], %rd4; +; CHECK-NEXT: cvt.rn.f64.s32 %rd1, %r9; +; CHECK-NEXT: cvt.rn.f64.u32 %rd2, %r10; +; CHECK-NEXT: add.rn.f64 %rd3, %rd2, %rd1; +; CHECK-NEXT: mov.b64 %rd4, 0; +; CHECK-NEXT: st.global.b64 [%rd4], %rd3; ; CHECK-NEXT: mov.b32 %r10, 1; ; CHECK-NEXT: bra.uni $L__BB0_1; bb: diff --git a/llvm/test/CodeGen/NVPTX/param-add.ll b/llvm/test/CodeGen/NVPTX/param-add.ll index 4fa1235..c5ea9f8 100644 --- a/llvm/test/CodeGen/NVPTX/param-add.ll +++ b/llvm/test/CodeGen/NVPTX/param-add.ll @@ -18,16 +18,16 @@ define i32 @test(%struct.1float alignstack(32) %data) { ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b32 %r1, [test_param_0]; -; CHECK-NEXT: shr.u32 %r2, %r1, 8; -; CHECK-NEXT: shr.u32 %r3, %r1, 16; -; CHECK-NEXT: shr.u32 %r4, %r1, 24; ; CHECK-NEXT: { // callseq 0, 0 ; CHECK-NEXT: .param .align 1 .b8 param0[4]; +; CHECK-NEXT: .param .b32 retval0; ; CHECK-NEXT: st.param.b8 [param0], %r1; +; CHECK-NEXT: shr.u32 %r2, %r1, 8; ; CHECK-NEXT: st.param.b8 [param0+1], %r2; +; CHECK-NEXT: shr.u32 %r3, %r1, 16; ; CHECK-NEXT: st.param.b8 [param0+2], %r3; +; CHECK-NEXT: shr.u32 %r4, %r3, 8; ; CHECK-NEXT: st.param.b8 [param0+3], %r4; -; CHECK-NEXT: .param .b32 retval0; ; CHECK-NEXT: call.uni (retval0), callee, (param0); ; CHECK-NEXT: ld.param.b32 %r5, [retval0]; ; CHECK-NEXT: } // callseq 0 diff --git a/llvm/test/CodeGen/NVPTX/param-load-store.ll b/llvm/test/CodeGen/NVPTX/param-load-store.ll index 6c52bfd..db3fbbc 100644 --- a/llvm/test/CodeGen/NVPTX/param-load-store.ll +++ b/llvm/test/CodeGen/NVPTX/param-load-store.ll @@ -27,10 +27,10 @@ ; CHECK: ld.param.b8 [[A8:%rs[0-9]+]], [test_i1_param_0]; ; CHECK: and.b16 [[A:%rs[0-9]+]], [[A8]], 1; ; CHECK: setp.ne.b16 %p1, [[A]], 0 +; CHECK-DAG: .param .b32 param0; +; CHECK-DAG: .param .b32 retval0; ; CHECK: cvt.u32.u16 [[B:%r[0-9]+]], [[A8]] -; CHECK: .param .b32 param0; -; CHECK: st.param.b32 [param0], [[B]] -; CHECK: .param .b32 retval0; +; CHECK-DAG: st.param.b32 [param0], [[B]] ; CHECK: call.uni (retval0), test_i1, ; CHECK: ld.param.b32 [[R8:%r[0-9]+]], [retval0]; ; CHECK: st.param.b32 [func_retval0], [[R8]]; @@ -47,11 +47,11 @@ define i1 @test_i1(i1 %a) { ; CHECK-NEXT: .param .b32 test_i1s_param_0 ; CHECK: ld.param.b8 [[A8:%rs[0-9]+]], [test_i1s_param_0]; ; CHECK: cvt.u32.u16 [[A32:%r[0-9]+]], [[A8]]; +; CHECK: .param .b32 param0; +; CHECK: .param .b32 retval0; ; CHECK: and.b32 [[A1:%r[0-9]+]], [[A32]], 1; ; CHECK: neg.s32 [[A:%r[0-9]+]], [[A1]]; -; CHECK: .param .b32 param0; ; CHECK: st.param.b32 [param0], [[A]]; -; CHECK: .param .b32 retval0; ; CHECK: call.uni ; CHECK: ld.param.b32 [[R8:%r[0-9]+]], [retval0]; ; CHECK: and.b32 [[R1:%r[0-9]+]], [[R8]], 1; @@ -70,9 +70,9 @@ define signext i1 @test_i1s(i1 signext %a) { ; CHECK-DAG: ld.param.b8 [[E2:%rs[0-9]+]], [test_v3i1_param_0+2]; ; CHECK-DAG: ld.param.b8 [[E0:%rs[0-9]+]], [test_v3i1_param_0] ; CHECK: .param .align 1 .b8 param0[1]; +; CHECK: .param .align 1 .b8 retval0[1]; ; CHECK-DAG: st.param.b8 [param0], [[E0]]; ; CHECK-DAG: st.param.b8 [param0+2], [[E2]]; -; CHECK: .param .align 1 .b8 retval0[1]; ; CHECK: call.uni (retval0), test_v3i1, ; CHECK-DAG: ld.param.b8 [[RE0:%rs[0-9]+]], [retval0]; ; CHECK-DAG: ld.param.b8 [[RE2:%rs[0-9]+]], [retval0+2]; @@ -89,8 +89,8 @@ define <3 x i1> @test_v3i1(<3 x i1> %a) { ; CHECK-NEXT: .param .align 1 .b8 test_v4i1_param_0[1] ; CHECK: ld.param.b8 [[E0:%rs[0-9]+]], [test_v4i1_param_0] ; CHECK: .param .align 1 .b8 param0[1]; -; CHECK: st.param.b8 [param0], [[E0]]; ; CHECK: .param .align 1 .b8 retval0[1]; +; CHECK: st.param.b8 [param0], [[E0]]; ; CHECK: call.uni (retval0), test_v4i1, ; CHECK: ld.param.b8 [[RE0:%rs[0-9]+]], [retval0]; ; CHECK: ld.param.b8 [[RE1:%rs[0-9]+]], [retval0+1]; @@ -112,9 +112,9 @@ define <4 x i1> @test_v4i1(<4 x i1> %a) { ; CHECK-DAG: ld.param.b8 [[E4:%rs[0-9]+]], [test_v5i1_param_0+4]; ; CHECK-DAG: ld.param.b8 [[E0:%rs[0-9]+]], [test_v5i1_param_0] ; CHECK: .param .align 1 .b8 param0[1]; +; CHECK: .param .align 1 .b8 retval0[1]; ; CHECK-DAG: st.param.b8 [param0], [[E0]]; ; CHECK-DAG: st.param.b8 [param0+4], [[E4]]; -; CHECK: .param .align 1 .b8 retval0[1]; ; CHECK: call.uni (retval0), test_v5i1, ; CHECK-DAG: ld.param.b8 [[RE0:%rs[0-9]+]], [retval0]; ; CHECK-DAG: ld.param.b8 [[RE4:%rs[0-9]+]], [retval0+4]; @@ -131,8 +131,8 @@ define <5 x i1> @test_v5i1(<5 x i1> %a) { ; CHECK-NEXT: .param .b32 test_i2_param_0 ; CHECK: ld.param.b8 {{%rs[0-9]+}}, [test_i2_param_0]; ; CHECK: .param .b32 param0; -; CHECK: st.param.b32 [param0], {{%r[0-9]+}}; ; CHECK: .param .b32 retval0; +; CHECK: st.param.b32 [param0], {{%r[0-9]+}}; ; CHECK: call.uni (retval0), test_i2, ; CHECK: ld.param.b32 {{%r[0-9]+}}, [retval0]; ; CHECK: st.param.b32 [func_retval0], {{%r[0-9]+}}; @@ -147,8 +147,8 @@ define i2 @test_i2(i2 %a) { ; CHECK-NEXT: .param .b32 test_i3_param_0 ; CHECK: ld.param.b8 {{%rs[0-9]+}}, [test_i3_param_0]; ; CHECK: .param .b32 param0; -; CHECK: st.param.b32 [param0], {{%r[0-9]+}}; ; CHECK: .param .b32 retval0; +; CHECK: st.param.b32 [param0], {{%r[0-9]+}}; ; CHECK: call.uni (retval0), test_i3, ; CHECK: ld.param.b32 {{%r[0-9]+}}, [retval0]; ; CHECK: st.param.b32 [func_retval0], {{%r[0-9]+}}; @@ -163,10 +163,10 @@ define i3 @test_i3(i3 %a) { ; CHECK-LABEL: test_i8( ; CHECK-NEXT: .param .b32 test_i8_param_0 ; CHECK: ld.param.b8 [[A8:%rs[0-9]+]], [test_i8_param_0]; -; CHECK: cvt.u32.u16 [[A32:%r[0-9]+]], [[A8]]; ; CHECK: .param .b32 param0; -; CHECK: st.param.b32 [param0], [[A32]]; ; CHECK: .param .b32 retval0; +; CHECK: cvt.u32.u16 [[A32:%r[0-9]+]], [[A8]]; +; CHECK: st.param.b32 [param0], [[A32]]; ; CHECK: call.uni (retval0), test_i8, ; CHECK: ld.param.b32 [[R32:%r[0-9]+]], [retval0]; ; CHECK: st.param.b32 [func_retval0], [[R32]]; @@ -181,10 +181,10 @@ define i8 @test_i8(i8 %a) { ; CHECK-LABEL: test_i8s( ; CHECK-NEXT: .param .b32 test_i8s_param_0 ; CHECK: ld.param.s8 [[A8:%rs[0-9]+]], [test_i8s_param_0]; -; CHECK: cvt.s32.s16 [[A:%r[0-9]+]], [[A8]]; ; CHECK: .param .b32 param0; -; CHECK: st.param.b32 [param0], [[A]]; ; CHECK: .param .b32 retval0; +; CHECK: cvt.s32.s16 [[A:%r[0-9]+]], [[A8]]; +; CHECK: st.param.b32 [param0], [[A]]; ; CHECK: call.uni (retval0), test_i8s, ; CHECK: ld.param.b32 [[R32:%r[0-9]+]], [retval0]; ; -- This is suspicious (though correct) -- why not cvt.u8.u32, cvt.s8.s32 ? @@ -202,8 +202,8 @@ define signext i8 @test_i8s(i8 signext %a) { ; CHECK-NEXT: .param .align 4 .b8 test_v3i8_param_0[4] ; CHECK: ld.param.b32 [[R:%r[0-9]+]], [test_v3i8_param_0]; ; CHECK: .param .align 4 .b8 param0[4]; -; CHECK: st.param.b32 [param0], [[R]] ; CHECK: .param .align 4 .b8 retval0[4]; +; CHECK: st.param.b32 [param0], [[R]] ; CHECK: call.uni (retval0), test_v3i8, ; CHECK: ld.param.b32 [[RE:%r[0-9]+]], [retval0]; ; v4i8/i32->{v3i8 elements}->v4i8/i32 conversion is messy and not very @@ -220,8 +220,8 @@ define <3 x i8> @test_v3i8(<3 x i8> %a) { ; CHECK-NEXT: .param .align 4 .b8 test_v4i8_param_0[4] ; CHECK: ld.param.b32 [[R:%r[0-9]+]], [test_v4i8_param_0] ; CHECK: .param .align 4 .b8 param0[4]; -; CHECK: st.param.b32 [param0], [[R]]; ; CHECK: .param .align 4 .b8 retval0[4]; +; CHECK: st.param.b32 [param0], [[R]]; ; CHECK: call.uni (retval0), test_v4i8, ; CHECK: ld.param.b32 [[RET:%r[0-9]+]], [retval0]; ; CHECK: st.param.b32 [func_retval0], [[RET]]; @@ -237,20 +237,13 @@ define <4 x i8> @test_v4i8(<4 x i8> %a) { ; CHECK-DAG: ld.param.b32 [[E0:%r[0-9]+]], [test_v5i8_param_0] ; CHECK-DAG: ld.param.b8 [[E4:%rs[0-9]+]], [test_v5i8_param_0+4]; ; CHECK: .param .align 8 .b8 param0[8]; -; CHECK-DAG: st.param.v4.b8 [param0], -; CHECK-DAG: st.param.b8 [param0+4], [[E4]]; ; CHECK: .param .align 8 .b8 retval0[8]; +; CHECK-DAG: st.param.b32 [param0], [[E0]]; +; CHECK-DAG: st.param.b8 [param0+4], [[E4]]; ; CHECK: call.uni (retval0), test_v5i8, -; CHECK-DAG: ld.param.v4.b8 {[[RE0:%rs[0-9]+]], [[RE1:%rs[0-9]+]], [[RE2:%rs[0-9]+]], [[RE3:%rs[0-9]+]]}, [retval0]; +; CHECK-DAG: ld.param.b32 [[RE0:%r[0-9]+]], [retval0]; ; CHECK-DAG: ld.param.b8 [[RE4:%rs[0-9]+]], [retval0+4]; -; CHECK-DAG: cvt.u32.u16 [[R3:%r[0-9]+]], [[RE3]]; -; CHECK-DAG: cvt.u32.u16 [[R2:%r[0-9]+]], [[RE2]]; -; CHECK-DAG: prmt.b32 [[P0:%r[0-9]+]], [[R2]], [[R3]], 0x3340U; -; CHECK-DAG: cvt.u32.u16 [[R1:%r[0-9]+]], [[RE1]]; -; CHECK-DAG: cvt.u32.u16 [[R0:%r[0-9]+]], [[RE0]]; -; CHECK-DAG: prmt.b32 [[P1:%r[0-9]+]], [[R0]], [[R1]], 0x3340U; -; CHECK-DAG: prmt.b32 [[P2:%r[0-9]+]], [[P1]], [[P0]], 0x5410U; -; CHECK-DAG: st.param.b32 [func_retval0], [[P2]]; +; CHECK-DAG: st.param.b32 [func_retval0], [[RE0]]; ; CHECK-DAG: st.param.b8 [func_retval0+4], [[RE4]]; ; CHECK-NEXT: ret; define <5 x i8> @test_v5i8(<5 x i8> %a) { @@ -262,8 +255,8 @@ define <5 x i8> @test_v5i8(<5 x i8> %a) { ; CHECK-LABEL: test_i11( ; CHECK-NEXT: .param .b32 test_i11_param_0 ; CHECK: ld.param.b16 {{%rs[0-9]+}}, [test_i11_param_0]; -; CHECK: st.param.b32 [param0], {{%r[0-9]+}}; ; CHECK: .param .b32 retval0; +; CHECK: st.param.b32 [param0], {{%r[0-9]+}}; ; CHECK: call.uni (retval0), test_i11, ; CHECK: ld.param.b32 {{%r[0-9]+}}, [retval0]; ; CHECK: st.param.b32 [func_retval0], {{%r[0-9]+}}; @@ -277,10 +270,10 @@ define i11 @test_i11(i11 %a) { ; CHECK-LABEL: test_i16( ; CHECK-NEXT: .param .b32 test_i16_param_0 ; CHECK: ld.param.b16 [[E16:%rs[0-9]+]], [test_i16_param_0]; -; CHECK: cvt.u32.u16 [[E32:%r[0-9]+]], [[E16]]; ; CHECK: .param .b32 param0; -; CHECK: st.param.b32 [param0], [[E32]]; ; CHECK: .param .b32 retval0; +; CHECK: cvt.u32.u16 [[E32:%r[0-9]+]], [[E16]]; +; CHECK: st.param.b32 [param0], [[E32]]; ; CHECK: call.uni (retval0), test_i16, ; CHECK: ld.param.b32 [[RE32:%r[0-9]+]], [retval0]; ; CHECK: st.param.b32 [func_retval0], [[RE32]]; @@ -294,10 +287,10 @@ define i16 @test_i16(i16 %a) { ; CHECK-LABEL: test_i16s( ; CHECK-NEXT: .param .b32 test_i16s_param_0 ; CHECK: ld.param.b16 [[E16:%rs[0-9]+]], [test_i16s_param_0]; -; CHECK: cvt.s32.s16 [[E32:%r[0-9]+]], [[E16]]; ; CHECK: .param .b32 param0; -; CHECK: st.param.b32 [param0], [[E32]]; ; CHECK: .param .b32 retval0; +; CHECK: cvt.s32.s16 [[E32:%r[0-9]+]], [[E16]]; +; CHECK: st.param.b32 [param0], [[E32]]; ; CHECK: call.uni (retval0), test_i16s, ; CHECK: ld.param.b32 [[RE32:%r[0-9]+]], [retval0]; ; CHECK: cvt.s32.s16 [[R:%r[0-9]+]], [[RE32]]; @@ -312,14 +305,15 @@ define signext i16 @test_i16s(i16 signext %a) { ; CHECK-LABEL: test_v3i16( ; CHECK-NEXT: .param .align 8 .b8 test_v3i16_param_0[8] ; CHECK-DAG: ld.param.b16 [[E2:%rs[0-9]+]], [test_v3i16_param_0+4]; -; CHECK-DAG: ld.param.v2.b16 {[[E0:%rs[0-9]+]], [[E1:%rs[0-9]+]]}, [test_v3i16_param_0]; +; CHECK-DAG: ld.param.b32 [[E0:%r[0-9]+]], [test_v3i16_param_0]; ; CHECK: .param .align 8 .b8 param0[8]; -; CHECK: st.param.v2.b16 [param0], {[[E0]], [[E1]]}; -; CHECK: st.param.b16 [param0+4], [[E2]]; ; CHECK: .param .align 8 .b8 retval0[8]; +; CHECK-DAG: st.param.b32 [param0], [[E0]]; +; CHECK-DAG: st.param.b16 [param0+4], [[E2]]; ; CHECK: call.uni (retval0), test_v3i16, -; CHECK: ld.param.v2.b16 {[[RE0:%rs[0-9]+]], [[RE1:%rs[0-9]+]]}, [retval0]; +; CHECK: ld.param.b32 [[RE:%r[0-9]+]], [retval0]; ; CHECK: ld.param.b16 [[RE2:%rs[0-9]+]], [retval0+4]; +; CHECK-DAG: mov.b32 {[[RE0:%rs[0-9]+]], [[RE1:%rs[0-9]+]]}, [[RE]]; ; CHECK-DAG: st.param.v2.b16 [func_retval0], {[[RE0]], [[RE1]]}; ; CHECK-DAG: st.param.b16 [func_retval0+4], [[RE2]]; ; CHECK-NEXT: ret; @@ -333,8 +327,8 @@ define <3 x i16> @test_v3i16(<3 x i16> %a) { ; CHECK-NEXT: .param .align 8 .b8 test_v4i16_param_0[8] ; CHECK: ld.param.v2.b32 {[[E0:%r[0-9]+]], [[E1:%r[0-9]+]]}, [test_v4i16_param_0] ; CHECK: .param .align 8 .b8 param0[8]; -; CHECK: st.param.v2.b32 [param0], {[[E0]], [[E1]]}; ; CHECK: .param .align 8 .b8 retval0[8]; +; CHECK: st.param.v2.b32 [param0], {[[E0]], [[E1]]}; ; CHECK: call.uni (retval0), test_v4i16, ; CHECK: ld.param.v2.b32 {[[RE0:%r[0-9]+]], [[RE1:%r[0-9]+]]}, [retval0]; ; CHECK: st.param.v2.b32 [func_retval0], {[[RE0]], [[RE1]]} @@ -348,15 +342,15 @@ define <4 x i16> @test_v4i16(<4 x i16> %a) { ; CHECK-LABEL: test_v5i16( ; CHECK-NEXT: .param .align 16 .b8 test_v5i16_param_0[16] ; CHECK-DAG: ld.param.b16 [[E4:%rs[0-9]+]], [test_v5i16_param_0+8]; -; CHECK-DAG: ld.param.v4.b16 {[[E0:%rs[0-9]+]], [[E1:%rs[0-9]+]], [[E2:%rs[0-9]+]], [[E3:%rs[0-9]+]]}, [test_v5i16_param_0] +; CHECK-DAG: ld.param.v2.b32 {[[E0:%r[0-9]+]], [[E1:%r[0-9]+]]}, [test_v5i16_param_0] ; CHECK: .param .align 16 .b8 param0[16]; -; CHECK-DAG: st.param.v4.b16 [param0], {[[E0]], [[E1]], [[E2]], [[E3]]}; -; CHECK-DAG: st.param.b16 [param0+8], [[E4]]; ; CHECK: .param .align 16 .b8 retval0[16]; +; CHECK-DAG: st.param.v2.b32 [param0], {[[E0]], [[E1]]}; +; CHECK-DAG: st.param.b16 [param0+8], [[E4]]; ; CHECK: call.uni (retval0), test_v5i16, -; CHECK-DAG: ld.param.v4.b16 {[[RE0:%rs[0-9]+]], [[RE1:%rs[0-9]+]], [[RE2:%rs[0-9]+]], [[RE3:%rs[0-9]+]]}, [retval0]; +; CHECK-DAG: ld.param.v2.b32 {[[RE0:%r[0-9]+]], [[RE1:%r[0-9]+]]}, [retval0]; ; CHECK-DAG: ld.param.b16 [[RE4:%rs[0-9]+]], [retval0+8]; -; CHECK-DAG: st.param.v4.b16 [func_retval0], {[[RE0]], [[RE1]], [[RE2]], [[RE3]]} +; CHECK-DAG: st.param.v2.b32 [func_retval0], {[[RE0]], [[RE1]]} ; CHECK-DAG: st.param.b16 [func_retval0+8], [[RE4]]; ; CHECK-NEXT: ret; define <5 x i16> @test_v5i16(<5 x i16> %a) { @@ -369,8 +363,8 @@ define <5 x i16> @test_v5i16(<5 x i16> %a) { ; CHECK-NEXT: .param .align 2 .b8 test_f16_param_0[2] ; CHECK: ld.param.b16 [[E:%rs[0-9]+]], [test_f16_param_0]; ; CHECK: .param .align 2 .b8 param0[2]; -; CHECK: st.param.b16 [param0], [[E]]; ; CHECK: .param .align 2 .b8 retval0[2]; +; CHECK: st.param.b16 [param0], [[E]]; ; CHECK: call.uni (retval0), test_f16, ; CHECK: ld.param.b16 [[R:%rs[0-9]+]], [retval0]; ; CHECK: st.param.b16 [func_retval0], [[R]] @@ -385,8 +379,8 @@ define half @test_f16(half %a) { ; CHECK-NEXT: .param .align 4 .b8 test_v2f16_param_0[4] ; CHECK: ld.param.b32 [[E:%r[0-9]+]], [test_v2f16_param_0]; ; CHECK: .param .align 4 .b8 param0[4]; -; CHECK: st.param.b32 [param0], [[E]]; ; CHECK: .param .align 4 .b8 retval0[4]; +; CHECK: st.param.b32 [param0], [[E]]; ; CHECK: call.uni (retval0), test_v2f16, ; CHECK: ld.param.b32 [[R:%r[0-9]+]], [retval0]; ; CHECK: st.param.b32 [func_retval0], [[R]] @@ -401,8 +395,8 @@ define <2 x half> @test_v2f16(<2 x half> %a) { ; CHECK-NEXT: .param .align 2 .b8 test_bf16_param_0[2] ; CHECK: ld.param.b16 [[E:%rs[0-9]+]], [test_bf16_param_0]; ; CHECK: .param .align 2 .b8 param0[2]; -; CHECK: st.param.b16 [param0], [[E]]; ; CHECK: .param .align 2 .b8 retval0[2]; +; CHECK: st.param.b16 [param0], [[E]]; ; CHECK: call.uni (retval0), test_bf16, ; CHECK: ld.param.b16 [[R:%rs[0-9]+]], [retval0]; ; CHECK: st.param.b16 [func_retval0], [[R]] @@ -417,8 +411,8 @@ define bfloat @test_bf16(bfloat %a) { ; CHECK-NEXT: .param .align 4 .b8 test_v2bf16_param_0[4] ; CHECK: ld.param.b32 [[E:%r[0-9]+]], [test_v2bf16_param_0]; ; CHECK: .param .align 4 .b8 param0[4]; -; CHECK: st.param.b32 [param0], [[E]]; ; CHECK: .param .align 4 .b8 retval0[4]; +; CHECK: st.param.b32 [param0], [[E]]; ; CHECK: call.uni (retval0), test_v2bf16, ; CHECK: ld.param.b32 [[R:%r[0-9]+]], [retval0]; ; CHECK: st.param.b32 [func_retval0], [[R]] @@ -432,15 +426,16 @@ define <2 x bfloat> @test_v2bf16(<2 x bfloat> %a) { ; CHECK:.func (.param .align 8 .b8 func_retval0[8]) ; CHECK-LABEL: test_v3f16( ; CHECK: .param .align 8 .b8 test_v3f16_param_0[8] -; CHECK-DAG: ld.param.v2.b16 {[[E0:%rs[0-9]+]], [[E1:%rs[0-9]+]]}, [test_v3f16_param_0]; +; CHECK-DAG: ld.param.b32 [[E0:%r[0-9]+]], [test_v3f16_param_0]; ; CHECK-DAG: ld.param.b16 [[E2:%rs[0-9]+]], [test_v3f16_param_0+4]; ; CHECK: .param .align 8 .b8 param0[8]; -; CHECK-DAG: st.param.v2.b16 [param0], {[[E0]], [[E1]]}; -; CHECK-DAG: st.param.b16 [param0+4], [[E2]]; ; CHECK: .param .align 8 .b8 retval0[8]; +; CHECK-DAG: st.param.b32 [param0], [[E0]]; +; CHECK-DAG: st.param.b16 [param0+4], [[E2]]; ; CHECK: call.uni (retval0), test_v3f16, -; CHECK-DAG: ld.param.v2.b16 {[[R0:%rs[0-9]+]], [[R1:%rs[0-9]+]]}, [retval0]; +; CHECK-DAG: ld.param.b32 [[R:%r[0-9]+]], [retval0]; ; CHECK-DAG: ld.param.b16 [[R2:%rs[0-9]+]], [retval0+4]; +; CHECK-DAG: mov.b32 {[[R0:%rs[0-9]+]], [[R1:%rs[0-9]+]]}, [[R]]; ; CHECK-DAG: st.param.v2.b16 [func_retval0], {[[R0]], [[R1]]}; ; CHECK-DAG: st.param.b16 [func_retval0+4], [[R2]]; ; CHECK: ret; @@ -454,8 +449,8 @@ define <3 x half> @test_v3f16(<3 x half> %a) { ; CHECK: .param .align 8 .b8 test_v4f16_param_0[8] ; CHECK: ld.param.v2.b32 {[[R01:%r[0-9]+]], [[R23:%r[0-9]+]]}, [test_v4f16_param_0]; ; CHECK: .param .align 8 .b8 param0[8]; -; CHECK: st.param.v2.b32 [param0], {[[R01]], [[R23]]}; ; CHECK: .param .align 8 .b8 retval0[8]; +; CHECK: st.param.v2.b32 [param0], {[[R01]], [[R23]]}; ; CHECK: call.uni (retval0), test_v4f16, ; CHECK: ld.param.v2.b32 {[[RH01:%r[0-9]+]], [[RH23:%r[0-9]+]]}, [retval0]; ; CHECK: st.param.v2.b32 [func_retval0], {[[RH01]], [[RH23]]}; @@ -468,16 +463,16 @@ define <4 x half> @test_v4f16(<4 x half> %a) { ; CHECK:.func (.param .align 16 .b8 func_retval0[16]) ; CHECK-LABEL: test_v5f16( ; CHECK: .param .align 16 .b8 test_v5f16_param_0[16] -; CHECK-DAG: ld.param.v4.b16 {[[E0:%rs[0-9]+]], [[E1:%rs[0-9]+]], [[E2:%rs[0-9]+]], [[E3:%rs[0-9]+]]}, [test_v5f16_param_0]; +; CHECK-DAG: ld.param.v2.b32 {[[E0:%r[0-9]+]], [[E1:%r[0-9]+]]}, [test_v5f16_param_0]; ; CHECK-DAG: ld.param.b16 [[E4:%rs[0-9]+]], [test_v5f16_param_0+8]; ; CHECK: .param .align 16 .b8 param0[16]; -; CHECK-DAG: st.param.v4.b16 [param0], -; CHECK-DAG: st.param.b16 [param0+8], [[E4]]; ; CHECK: .param .align 16 .b8 retval0[16]; +; CHECK-DAG: st.param.v2.b32 [param0], {[[E0]], [[E1]]}; +; CHECK-DAG: st.param.b16 [param0+8], [[E4]]; ; CHECK: call.uni (retval0), test_v5f16, -; CHECK-DAG: ld.param.v4.b16 {[[R0:%rs[0-9]+]], [[R1:%rs[0-9]+]], [[R2:%rs[0-9]+]], [[R3:%rs[0-9]+]]}, [retval0]; +; CHECK-DAG: ld.param.v2.b32 {[[R0:%r[0-9]+]], [[R1:%r[0-9]+]]}, [retval0]; ; CHECK-DAG: ld.param.b16 [[R4:%rs[0-9]+]], [retval0+8]; -; CHECK-DAG: st.param.v4.b16 [func_retval0], {[[R0]], [[R1]], [[R2]], [[R3]]}; +; CHECK-DAG: st.param.v2.b32 [func_retval0], {[[R0]], [[R1]]}; ; CHECK-DAG: st.param.b16 [func_retval0+8], [[R4]]; ; CHECK: ret; define <5 x half> @test_v5f16(<5 x half> %a) { @@ -490,8 +485,8 @@ define <5 x half> @test_v5f16(<5 x half> %a) { ; CHECK: .param .align 16 .b8 test_v8f16_param_0[16] ; CHECK: ld.param.v4.b32 {[[R01:%r[0-9]+]], [[R23:%r[0-9]+]], [[R45:%r[0-9]+]], [[R67:%r[0-9]+]]}, [test_v8f16_param_0]; ; CHECK: .param .align 16 .b8 param0[16]; -; CHECK: st.param.v4.b32 [param0], {[[R01]], [[R23]], [[R45]], [[R67]]}; ; CHECK: .param .align 16 .b8 retval0[16]; +; CHECK: st.param.v4.b32 [param0], {[[R01]], [[R23]], [[R45]], [[R67]]}; ; CHECK: call.uni (retval0), test_v8f16, ; CHECK: ld.param.v4.b32 {[[RH01:%r[0-9]+]], [[RH23:%r[0-9]+]], [[RH45:%r[0-9]+]], [[RH67:%r[0-9]+]]}, [retval0]; ; CHECK: st.param.v4.b32 [func_retval0], {[[RH01]], [[RH23]], [[RH45]], [[RH67]]}; @@ -504,20 +499,20 @@ define <8 x half> @test_v8f16(<8 x half> %a) { ; CHECK:.func (.param .align 32 .b8 func_retval0[32]) ; CHECK-LABEL: test_v9f16( ; CHECK: .param .align 32 .b8 test_v9f16_param_0[32] -; CHECK-DAG: ld.param.v4.b16 {[[E0:%rs[0-9]+]], [[E1:%rs[0-9]+]], [[E2:%rs[0-9]+]], [[E3:%rs[0-9]+]]}, [test_v9f16_param_0]; -; CHECK-DAG: ld.param.v4.b16 {[[E4:%rs[0-9]+]], [[E5:%rs[0-9]+]], [[E6:%rs[0-9]+]], [[E7:%rs[0-9]+]]}, [test_v9f16_param_0+8]; +; CHECK-DAG: ld.param.v2.b32 {[[E0:%r[0-9]+]], [[E1:%r[0-9]+]]}, [test_v9f16_param_0]; +; CHECK-DAG: ld.param.v2.b32 {[[E2:%r[0-9]+]], [[E3:%r[0-9]+]]}, [test_v9f16_param_0+8]; ; CHECK-DAG: ld.param.b16 [[E8:%rs[0-9]+]], [test_v9f16_param_0+16]; ; CHECK: .param .align 32 .b8 param0[32]; -; CHECK-DAG: st.param.v4.b16 [param0], -; CHECK-DAG: st.param.v4.b16 [param0+8], -; CHECK-DAG: st.param.b16 [param0+16], [[E8]]; ; CHECK: .param .align 32 .b8 retval0[32]; +; CHECK-DAG: st.param.v2.b32 [param0], {[[E0]], [[E1]]}; +; CHECK-DAG: st.param.v2.b32 [param0+8], {[[E2]], [[E3]]}; +; CHECK-DAG: st.param.b16 [param0+16], [[E8]]; ; CHECK: call.uni (retval0), test_v9f16, -; CHECK-DAG: ld.param.v4.b16 {[[R0:%rs[0-9]+]], [[R1:%rs[0-9]+]], [[R2:%rs[0-9]+]], [[R3:%rs[0-9]+]]}, [retval0]; -; CHECK-DAG: ld.param.v4.b16 {[[R4:%rs[0-9]+]], [[R5:%rs[0-9]+]], [[R6:%rs[0-9]+]], [[R7:%rs[0-9]+]]}, [retval0+8]; +; CHECK-DAG: ld.param.v2.b32 {[[R0:%r[0-9]+]], [[R1:%r[0-9]+]]}, [retval0]; +; CHECK-DAG: ld.param.v2.b32 {[[R2:%r[0-9]+]], [[R3:%r[0-9]+]]}, [retval0+8]; ; CHECK-DAG: ld.param.b16 [[R8:%rs[0-9]+]], [retval0+16]; -; CHECK-DAG: st.param.v4.b16 [func_retval0], {[[R0]], [[R1]], [[R2]], [[R3]]}; -; CHECK-DAG: st.param.v4.b16 [func_retval0+8], {[[R4]], [[R5]], [[R6]], [[R7]]}; +; CHECK-DAG: st.param.v2.b32 [func_retval0], {[[R0]], [[R1]]}; +; CHECK-DAG: st.param.v2.b32 [func_retval0+8], {[[R2]], [[R3]]}; ; CHECK-DAG: st.param.b16 [func_retval0+16], [[R8]]; ; CHECK: ret; define <9 x half> @test_v9f16(<9 x half> %a) { @@ -531,8 +526,8 @@ define <9 x half> @test_v9f16(<9 x half> %a) { ; CHECK-DAG: ld.param.b16 {{%r[0-9]+}}, [test_i19_param_0]; ; CHECK-DAG: ld.param.b8 {{%r[0-9]+}}, [test_i19_param_0+2]; ; CHECK: .param .b32 param0; -; CHECK: st.param.b32 [param0], {{%r[0-9]+}}; ; CHECK: .param .b32 retval0; +; CHECK: st.param.b32 [param0], {{%r[0-9]+}}; ; CHECK: call.uni (retval0), test_i19, ; CHECK: ld.param.b32 {{%r[0-9]+}}, [retval0]; ; CHECK: st.param.b32 [func_retval0], {{%r[0-9]+}}; @@ -548,8 +543,8 @@ define i19 @test_i19(i19 %a) { ; CHECK-DAG: ld.param.b16 {{%r[0-9]+}}, [test_i23_param_0]; ; CHECK-DAG: ld.param.b8 {{%r[0-9]+}}, [test_i23_param_0+2]; ; CHECK: .param .b32 param0; -; CHECK: st.param.b32 [param0], {{%r[0-9]+}}; ; CHECK: .param .b32 retval0; +; CHECK: st.param.b32 [param0], {{%r[0-9]+}}; ; CHECK: call.uni (retval0), test_i23, ; CHECK: ld.param.b32 {{%r[0-9]+}}, [retval0]; ; CHECK: st.param.b32 [func_retval0], {{%r[0-9]+}}; @@ -565,8 +560,8 @@ define i23 @test_i23(i23 %a) { ; CHECK-DAG: ld.param.b8 {{%r[0-9]+}}, [test_i24_param_0+2]; ; CHECK-DAG: ld.param.b16 {{%r[0-9]+}}, [test_i24_param_0]; ; CHECK: .param .b32 param0; -; CHECK: st.param.b32 [param0], {{%r[0-9]+}}; ; CHECK: .param .b32 retval0; +; CHECK: st.param.b32 [param0], {{%r[0-9]+}}; ; CHECK: call.uni (retval0), test_i24, ; CHECK: ld.param.b32 {{%r[0-9]+}}, [retval0]; ; CHECK: st.param.b32 [func_retval0], {{%r[0-9]+}}; @@ -581,8 +576,8 @@ define i24 @test_i24(i24 %a) { ; CHECK-NEXT: .param .b32 test_i29_param_0 ; CHECK: ld.param.b32 {{%r[0-9]+}}, [test_i29_param_0]; ; CHECK: .param .b32 param0; -; CHECK: st.param.b32 [param0], {{%r[0-9]+}}; ; CHECK: .param .b32 retval0; +; CHECK: st.param.b32 [param0], {{%r[0-9]+}}; ; CHECK: call.uni (retval0), test_i29, ; CHECK: ld.param.b32 {{%r[0-9]+}}, [retval0]; ; CHECK: st.param.b32 [func_retval0], {{%r[0-9]+}}; @@ -597,8 +592,8 @@ define i29 @test_i29(i29 %a) { ; CHECK-NEXT: .param .b32 test_i32_param_0 ; CHECK: ld.param.b32 [[E:%r[0-9]+]], [test_i32_param_0]; ; CHECK: .param .b32 param0; -; CHECK: st.param.b32 [param0], [[E]]; ; CHECK: .param .b32 retval0; +; CHECK: st.param.b32 [param0], [[E]]; ; CHECK: call.uni (retval0), test_i32, ; CHECK: ld.param.b32 [[R:%r[0-9]+]], [retval0]; ; CHECK: st.param.b32 [func_retval0], [[R]]; @@ -613,10 +608,10 @@ define i32 @test_i32(i32 %a) { ; CHECK-NEXT: .param .align 16 .b8 test_v3i32_param_0[16] ; CHECK-DAG: ld.param.b32 [[E2:%r[0-9]+]], [test_v3i32_param_0+8]; ; CHECK-DAG: ld.param.v2.b32 {[[E0:%r[0-9]+]], [[E1:%r[0-9]+]]}, [test_v3i32_param_0]; -; CHECK: .param .align 16 .b8 param0[16]; -; CHECK: st.param.v2.b32 [param0], {[[E0]], [[E1]]}; -; CHECK: st.param.b32 [param0+8], [[E2]]; -; CHECK: .param .align 16 .b8 retval0[16]; +; CHECK-DAG: .param .align 16 .b8 param0[16]; +; CHECK-DAG: .param .align 16 .b8 retval0[16]; +; CHECK-DAG: st.param.v2.b32 [param0], {[[E0]], [[E1]]}; +; CHECK-DAG: st.param.b32 [param0+8], [[E2]]; ; CHECK: call.uni (retval0), test_v3i32, ; CHECK: ld.param.v2.b32 {[[RE0:%r[0-9]+]], [[RE1:%r[0-9]+]]}, [retval0]; ; CHECK: ld.param.b32 [[RE2:%r[0-9]+]], [retval0+8]; @@ -632,9 +627,9 @@ define <3 x i32> @test_v3i32(<3 x i32> %a) { ; CHECK-LABEL: test_v4i32( ; CHECK-NEXT: .param .align 16 .b8 test_v4i32_param_0[16] ; CHECK: ld.param.v4.b32 {[[E0:%r[0-9]+]], [[E1:%r[0-9]+]], [[E2:%r[0-9]+]], [[E3:%r[0-9]+]]}, [test_v4i32_param_0] -; CHECK: .param .align 16 .b8 param0[16]; -; CHECK: st.param.v4.b32 [param0], {[[E0]], [[E1]], [[E2]], [[E3]]}; -; CHECK: .param .align 16 .b8 retval0[16]; +; CHECK-DAG: .param .align 16 .b8 param0[16]; +; CHECK-DAG: .param .align 16 .b8 retval0[16]; +; CHECK-DAG: st.param.v4.b32 [param0], {[[E0]], [[E1]], [[E2]], [[E3]]}; ; CHECK: call.uni (retval0), test_v4i32, ; CHECK: ld.param.v4.b32 {[[RE0:%r[0-9]+]], [[RE1:%r[0-9]+]], [[RE2:%r[0-9]+]], [[RE3:%r[0-9]+]]}, [retval0]; ; CHECK: st.param.v4.b32 [func_retval0], {[[RE0]], [[RE1]], [[RE2]], [[RE3]]} @@ -650,9 +645,9 @@ define <4 x i32> @test_v4i32(<4 x i32> %a) { ; CHECK-DAG: ld.param.b32 [[E4:%r[0-9]+]], [test_v5i32_param_0+16]; ; CHECK-DAG: ld.param.v4.b32 {[[E0:%r[0-9]+]], [[E1:%r[0-9]+]], [[E2:%r[0-9]+]], [[E3:%r[0-9]+]]}, [test_v5i32_param_0] ; CHECK: .param .align 32 .b8 param0[32]; +; CHECK: .param .align 32 .b8 retval0[32]; ; CHECK-DAG: st.param.v4.b32 [param0], {[[E0]], [[E1]], [[E2]], [[E3]]}; ; CHECK-DAG: st.param.b32 [param0+16], [[E4]]; -; CHECK: .param .align 32 .b8 retval0[32]; ; CHECK: call.uni (retval0), test_v5i32, ; CHECK-DAG: ld.param.v4.b32 {[[RE0:%r[0-9]+]], [[RE1:%r[0-9]+]], [[RE2:%r[0-9]+]], [[RE3:%r[0-9]+]]}, [retval0]; ; CHECK-DAG: ld.param.b32 [[RE4:%r[0-9]+]], [retval0+16]; @@ -669,8 +664,8 @@ define <5 x i32> @test_v5i32(<5 x i32> %a) { ; CHECK-NEXT: .param .b32 test_f32_param_0 ; CHECK: ld.param.b32 [[E:%r[0-9]+]], [test_f32_param_0]; ; CHECK: .param .b32 param0; -; CHECK: st.param.b32 [param0], [[E]]; ; CHECK: .param .b32 retval0; +; CHECK: st.param.b32 [param0], [[E]]; ; CHECK: call.uni (retval0), test_f32, ; CHECK: ld.param.b32 [[R:%r[0-9]+]], [retval0]; ; CHECK: st.param.b32 [func_retval0], [[R]]; @@ -686,8 +681,8 @@ define float @test_f32(float %a) { ; CHECK-DAG: ld.param.b8 {{%rd[0-9]+}}, [test_i40_param_0+4]; ; CHECK-DAG: ld.param.b32 {{%rd[0-9]+}}, [test_i40_param_0]; ; CHECK: .param .b64 param0; -; CHECK: st.param.b64 [param0], {{%rd[0-9]+}}; ; CHECK: .param .b64 retval0; +; CHECK: st.param.b64 [param0], {{%rd[0-9]+}}; ; CHECK: call.uni (retval0), test_i40, ; CHECK: ld.param.b64 {{%rd[0-9]+}}, [retval0]; ; CHECK: st.param.b64 [func_retval0], {{%rd[0-9]+}}; @@ -703,8 +698,8 @@ define i40 @test_i40(i40 %a) { ; CHECK-DAG: ld.param.b16 {{%rd[0-9]+}}, [test_i47_param_0+4]; ; CHECK-DAG: ld.param.b32 {{%rd[0-9]+}}, [test_i47_param_0]; ; CHECK: .param .b64 param0; -; CHECK: st.param.b64 [param0], {{%rd[0-9]+}}; ; CHECK: .param .b64 retval0; +; CHECK: st.param.b64 [param0], {{%rd[0-9]+}}; ; CHECK: call.uni (retval0), test_i47, ; CHECK: ld.param.b64 {{%rd[0-9]+}}, [retval0]; ; CHECK: st.param.b64 [func_retval0], {{%rd[0-9]+}}; @@ -720,8 +715,8 @@ define i47 @test_i47(i47 %a) { ; CHECK-DAG: ld.param.b16 {{%rd[0-9]+}}, [test_i48_param_0+4]; ; CHECK-DAG: ld.param.b32 {{%rd[0-9]+}}, [test_i48_param_0]; ; CHECK: .param .b64 param0; -; CHECK: st.param.b64 [param0], {{%rd[0-9]+}}; ; CHECK: .param .b64 retval0; +; CHECK: st.param.b64 [param0], {{%rd[0-9]+}}; ; CHECK: call.uni (retval0), test_i48, ; CHECK: ld.param.b64 {{%rd[0-9]+}}, [retval0]; ; CHECK: st.param.b64 [func_retval0], {{%rd[0-9]+}}; @@ -738,8 +733,8 @@ define i48 @test_i48(i48 %a) { ; CHECK-DAG: ld.param.b16 {{%rd[0-9]+}}, [test_i51_param_0+4]; ; CHECK-DAG: ld.param.b32 {{%rd[0-9]+}}, [test_i51_param_0]; ; CHECK: .param .b64 param0; -; CHECK: st.param.b64 [param0], {{%rd[0-9]+}}; ; CHECK: .param .b64 retval0; +; CHECK: st.param.b64 [param0], {{%rd[0-9]+}}; ; CHECK: call.uni (retval0), test_i51, ; CHECK: ld.param.b64 {{%rd[0-9]+}}, [retval0]; ; CHECK: st.param.b64 [func_retval0], {{%rd[0-9]+}}; @@ -756,8 +751,8 @@ define i51 @test_i51(i51 %a) { ; CHECK-DAG: ld.param.b16 {{%rd[0-9]+}}, [test_i56_param_0+4]; ; CHECK-DAG: ld.param.b32 {{%rd[0-9]+}}, [test_i56_param_0]; ; CHECK: .param .b64 param0; -; CHECK: st.param.b64 [param0], {{%rd[0-9]+}}; ; CHECK: .param .b64 retval0; +; CHECK: st.param.b64 [param0], {{%rd[0-9]+}}; ; CHECK: call.uni (retval0), test_i56, ; CHECK: ld.param.b64 {{%rd[0-9]+}}, [retval0]; ; CHECK: st.param.b64 [func_retval0], {{%rd[0-9]+}}; @@ -772,8 +767,8 @@ define i56 @test_i56(i56 %a) { ; CHECK-NEXT: .param .b64 test_i57_param_0 ; CHECK: ld.param.b64 {{%rd[0-9]+}}, [test_i57_param_0]; ; CHECK: .param .b64 param0; -; CHECK: st.param.b64 [param0], {{%rd[0-9]+}}; ; CHECK: .param .b64 retval0; +; CHECK: st.param.b64 [param0], {{%rd[0-9]+}}; ; CHECK: call.uni (retval0), test_i57, ; CHECK: ld.param.b64 {{%rd[0-9]+}}, [retval0]; ; CHECK: st.param.b64 [func_retval0], {{%rd[0-9]+}}; @@ -788,8 +783,8 @@ define i57 @test_i57(i57 %a) { ; CHECK-NEXT: .param .b64 test_i64_param_0 ; CHECK: ld.param.b64 [[E:%rd[0-9]+]], [test_i64_param_0]; ; CHECK: .param .b64 param0; -; CHECK: st.param.b64 [param0], [[E]]; ; CHECK: .param .b64 retval0; +; CHECK: st.param.b64 [param0], [[E]]; ; CHECK: call.uni (retval0), test_i64, ; CHECK: ld.param.b64 [[R:%rd[0-9]+]], [retval0]; ; CHECK: st.param.b64 [func_retval0], [[R]]; @@ -805,9 +800,9 @@ define i64 @test_i64(i64 %a) { ; CHECK-DAG: ld.param.b64 [[E2:%rd[0-9]+]], [test_v3i64_param_0+16]; ; CHECK-DAG: ld.param.v2.b64 {[[E0:%rd[0-9]+]], [[E1:%rd[0-9]+]]}, [test_v3i64_param_0]; ; CHECK: .param .align 32 .b8 param0[32]; -; CHECK: st.param.v2.b64 [param0], {[[E0]], [[E1]]}; -; CHECK: st.param.b64 [param0+16], [[E2]]; ; CHECK: .param .align 32 .b8 retval0[32]; +; CHECK-DAG: st.param.v2.b64 [param0], {[[E0]], [[E1]]}; +; CHECK-DAG: st.param.b64 [param0+16], [[E2]]; ; CHECK: call.uni (retval0), test_v3i64, ; CHECK: ld.param.v2.b64 {[[RE0:%rd[0-9]+]], [[RE1:%rd[0-9]+]]}, [retval0]; ; CHECK: ld.param.b64 [[RE2:%rd[0-9]+]], [retval0+16]; @@ -828,9 +823,9 @@ define <3 x i64> @test_v3i64(<3 x i64> %a) { ; CHECK-DAG: ld.param.v2.b64 {[[E2:%rd[0-9]+]], [[E3:%rd[0-9]+]]}, [test_v4i64_param_0+16]; ; CHECK-DAG: ld.param.v2.b64 {[[E0:%rd[0-9]+]], [[E1:%rd[0-9]+]]}, [test_v4i64_param_0]; ; CHECK: .param .align 32 .b8 param0[32]; -; CHECK: st.param.v2.b64 [param0], {[[E0]], [[E1]]}; -; CHECK: st.param.v2.b64 [param0+16], {[[E2]], [[E3]]}; ; CHECK: .param .align 32 .b8 retval0[32]; +; CHECK-DAG: st.param.v2.b64 [param0], {[[E0]], [[E1]]}; +; CHECK-DAG: st.param.v2.b64 [param0+16], {[[E2]], [[E3]]}; ; CHECK: call.uni (retval0), test_v4i64, ; CHECK: ld.param.v2.b64 {[[RE0:%rd[0-9]+]], [[RE1:%rd[0-9]+]]}, [retval0]; ; CHECK: ld.param.v2.b64 {[[RE2:%rd[0-9]+]], [[RE3:%rd[0-9]+]]}, [retval0+16]; @@ -849,8 +844,8 @@ define <4 x i64> @test_v4i64(<4 x i64> %a) { ; CHECK-NEXT: .align 1 .b8 test_s_i1_param_0[1] ; CHECK: ld.param.b8 [[A:%rs[0-9]+]], [test_s_i1_param_0]; ; CHECK: .param .align 1 .b8 param0[1]; -; CHECK: st.param.b8 [param0], [[A]] ; CHECK: .param .align 1 .b8 retval0[1]; +; CHECK: st.param.b8 [param0], [[A]] ; CHECK: call.uni (retval0), test_s_i1, ; CHECK: ld.param.b8 [[R:%rs[0-9]+]], [retval0]; ; CHECK: st.param.b8 [func_retval0], [[R]]; @@ -865,8 +860,8 @@ define %s_i1 @test_s_i1(%s_i1 %a) { ; CHECK-NEXT: .param .align 1 .b8 test_s_i8_param_0[1] ; CHECK: ld.param.b8 [[A:%rs[0-9]+]], [test_s_i8_param_0]; ; CHECK: .param .align 1 .b8 param0[1]; -; CHECK: st.param.b8 [param0], [[A]] ; CHECK: .param .align 1 .b8 retval0[1]; +; CHECK: st.param.b8 [param0], [[A]] ; CHECK: call.uni (retval0), test_s_i8, ; CHECK: ld.param.b8 [[R:%rs[0-9]+]], [retval0]; ; CHECK: st.param.b8 [func_retval0], [[R]]; @@ -881,8 +876,8 @@ define %s_i8 @test_s_i8(%s_i8 %a) { ; CHECK-NEXT: .param .align 2 .b8 test_s_i16_param_0[2] ; CHECK: ld.param.b16 [[A:%rs[0-9]+]], [test_s_i16_param_0]; ; CHECK: .param .align 2 .b8 param0[2]; -; CHECK: st.param.b16 [param0], [[A]] ; CHECK: .param .align 2 .b8 retval0[2]; +; CHECK: st.param.b16 [param0], [[A]] ; CHECK: call.uni (retval0), test_s_i16, ; CHECK: ld.param.b16 [[R:%rs[0-9]+]], [retval0]; ; CHECK: st.param.b16 [func_retval0], [[R]]; @@ -897,8 +892,8 @@ define %s_i16 @test_s_i16(%s_i16 %a) { ; CHECK-NEXT: .param .align 2 .b8 test_s_f16_param_0[2] ; CHECK: ld.param.b16 [[A:%rs[0-9]+]], [test_s_f16_param_0]; ; CHECK: .param .align 2 .b8 param0[2]; -; CHECK: st.param.b16 [param0], [[A]] ; CHECK: .param .align 2 .b8 retval0[2]; +; CHECK: st.param.b16 [param0], [[A]] ; CHECK: call.uni (retval0), test_s_f16, ; CHECK: ld.param.b16 [[R:%rs[0-9]+]], [retval0]; ; CHECK: st.param.b16 [func_retval0], [[R]]; @@ -913,8 +908,8 @@ define %s_f16 @test_s_f16(%s_f16 %a) { ; CHECK-NEXT: .param .align 4 .b8 test_s_i32_param_0[4] ; CHECK: ld.param.b32 [[E:%r[0-9]+]], [test_s_i32_param_0]; ; CHECK: .param .align 4 .b8 param0[4] -; CHECK: st.param.b32 [param0], [[E]]; ; CHECK: .param .align 4 .b8 retval0[4]; +; CHECK: st.param.b32 [param0], [[E]]; ; CHECK: call.uni (retval0), test_s_i32, ; CHECK: ld.param.b32 [[R:%r[0-9]+]], [retval0]; ; CHECK: st.param.b32 [func_retval0], [[R]]; @@ -929,8 +924,8 @@ define %s_i32 @test_s_i32(%s_i32 %a) { ; CHECK-NEXT: .param .align 4 .b8 test_s_f32_param_0[4] ; CHECK: ld.param.b32 [[E:%r[0-9]+]], [test_s_f32_param_0]; ; CHECK: .param .align 4 .b8 param0[4] -; CHECK: st.param.b32 [param0], [[E]]; ; CHECK: .param .align 4 .b8 retval0[4]; +; CHECK: st.param.b32 [param0], [[E]]; ; CHECK: call.uni (retval0), test_s_f32, ; CHECK: ld.param.b32 [[R:%r[0-9]+]], [retval0]; ; CHECK: st.param.b32 [func_retval0], [[R]]; @@ -945,8 +940,8 @@ define %s_f32 @test_s_f32(%s_f32 %a) { ; CHECK-NEXT: .param .align 8 .b8 test_s_i64_param_0[8] ; CHECK: ld.param.b64 [[E:%rd[0-9]+]], [test_s_i64_param_0]; ; CHECK: .param .align 8 .b8 param0[8]; -; CHECK: st.param.b64 [param0], [[E]]; ; CHECK: .param .align 8 .b8 retval0[8]; +; CHECK: st.param.b64 [param0], [[E]]; ; CHECK: call.uni (retval0), test_s_i64, ; CHECK: ld.param.b64 [[R:%rd[0-9]+]], [retval0]; ; CHECK: st.param.b64 [func_retval0], [[R]]; @@ -966,12 +961,12 @@ define %s_i64 @test_s_i64(%s_i64 %a) { ; CHECK-DAG: ld.param.b32 [[E1:%r[0-9]+]], [test_s_i32f32_param_0+4]; ; CHECK-DAG: ld.param.b32 [[E0:%r[0-9]+]], [test_s_i32f32_param_0]; ; CHECK: .param .align 8 .b8 param0[24]; +; CHECK: .param .align 8 .b8 retval0[24]; ; CHECK-DAG: st.param.b32 [param0], [[E0]]; ; CHECK-DAG: st.param.b32 [param0+4], [[E1]]; ; CHECK-DAG: st.param.b32 [param0+8], [[E2]]; ; CHECK-DAG: st.param.b32 [param0+12], [[E3]]; ; CHECK-DAG: st.param.b64 [param0+16], [[E4]]; -; CHECK: .param .align 8 .b8 retval0[24]; ; CHECK: call.uni (retval0), test_s_i32f32, ; CHECK-DAG: ld.param.b32 [[RE0:%r[0-9]+]], [retval0]; ; CHECK-DAG: ld.param.b32 [[RE1:%r[0-9]+]], [retval0+4]; @@ -997,10 +992,10 @@ define %s_i32f32 @test_s_i32f32(%s_i32f32 %a) { ; CHECK-DAG: ld.param.v2.b32 {[[E2:%r[0-9]+]], [[E3:%r[0-9]+]]}, [test_s_i32x4_param_0+8]; ; CHECK-DAG: ld.param.v2.b32 {[[E0:%r[0-9]+]], [[E1:%r[0-9]+]]}, [test_s_i32x4_param_0]; ; CHECK: .param .align 8 .b8 param0[24]; -; CHECK: st.param.v2.b32 [param0], {[[E0]], [[E1]]}; -; CHECK: st.param.v2.b32 [param0+8], {[[E2]], [[E3]]}; -; CHECK: st.param.b64 [param0+16], [[E4]]; ; CHECK: .param .align 8 .b8 retval0[24]; +; CHECK-DAG: st.param.v2.b32 [param0], {[[E0]], [[E1]]}; +; CHECK-DAG: st.param.v2.b32 [param0+8], {[[E2]], [[E3]]}; +; CHECK-DAG: st.param.b64 [param0+16], [[E4]]; ; CHECK: call.uni (retval0), test_s_i32x4, ; CHECK: ld.param.v2.b32 {[[RE0:%r[0-9]+]], [[RE1:%r[0-9]+]]}, [retval0]; ; CHECK: ld.param.v2.b32 {[[RE2:%r[0-9]+]], [[RE3:%r[0-9]+]]}, [retval0+8]; @@ -1024,16 +1019,13 @@ define %s_i32x4 @test_s_i32x4(%s_i32x4 %a) { ; CHECK: ld.param.b8 [[E2:%rs[0-9]+]], [test_s_i1i32x4_param_0+8]; ; CHECK: ld.param.v2.b32 {[[E0:%r[0-9]+]], [[E1:%r[0-9]+]]}, [test_s_i1i32x4_param_0]; ; CHECK: .param .align 8 .b8 param0[32]; -; CHECK: st.param.v2.b32 [param0], {[[E0]], [[E1]]}; -; CHECK: st.param.b8 [param0+8], [[E2]]; -; CHECK: st.param.b32 [param0+12], [[E3]]; -; CHECK: st.param.b32 [param0+16], [[E4]]; -; CHECK: st.param.b64 [param0+24], [[E5]]; ; CHECK: .param .align 8 .b8 retval0[32]; -; CHECK: call.uni (retval0), test_s_i1i32x4, -; CHECK: ( -; CHECK: param0 -; CHECK: ); +; CHECK-DAG: st.param.v2.b32 [param0], {[[E0]], [[E1]]}; +; CHECK-DAG: st.param.b8 [param0+8], [[E2]]; +; CHECK-DAG: st.param.b32 [param0+12], [[E3]]; +; CHECK-DAG: st.param.b32 [param0+16], [[E4]]; +; CHECK-DAG: st.param.b64 [param0+24], [[E5]]; +; CHECK: call.uni (retval0), test_s_i1i32x4, (param0); ; CHECK: ld.param.v2.b32 {[[RE0:%r[0-9]+]], [[RE1:%r[0-9]+]]}, [retval0]; ; CHECK: ld.param.b8 [[RE2:%rs[0-9]+]], [retval0+8]; ; CHECK: ld.param.b32 [[RE3:%r[0-9]+]], [retval0+12]; @@ -1082,6 +1074,7 @@ define %s_i8i32x4 @test_s_i1i32x4(%s_i8i32x4 %a) { ; CHECK-DAG: ld.param.b8 %r{{.*}}, [test_s_i1i32x4p_param_0+1]; ; CHECK-DAG: ld.param.b8 %r{{.*}}, [test_s_i1i32x4p_param_0]; ; CHECK: .param .align 1 .b8 param0[25]; +; CHECK: .param .align 1 .b8 retval0[25]; ; CHECK-DAG: st.param.b8 [param0], ; CHECK-DAG: st.param.b8 [param0+1], ; CHECK-DAG: st.param.b8 [param0+2], @@ -1107,33 +1100,32 @@ define %s_i8i32x4 @test_s_i1i32x4(%s_i8i32x4 %a) { ; CHECK-DAG: st.param.b8 [param0+22], ; CHECK-DAG: st.param.b8 [param0+23], ; CHECK-DAG: st.param.b8 [param0+24], -; CHECK: .param .align 1 .b8 retval0[25]; -; CHECK: call.uni (retval0), test_s_i1i32x4p, -; CHECK-DAG: ld.param.b8 %rs{{[0-9]+}}, [retval0]; -; CHECK-DAG: ld.param.b8 %rs{{[0-9]+}}, [retval0+1]; -; CHECK-DAG: ld.param.b8 %rs{{[0-9]+}}, [retval0+2]; -; CHECK-DAG: ld.param.b8 %rs{{[0-9]+}}, [retval0+3]; -; CHECK-DAG: ld.param.b8 %rs{{[0-9]+}}, [retval0+4]; -; CHECK-DAG: ld.param.b8 %rs{{[0-9]+}}, [retval0+5]; -; CHECK-DAG: ld.param.b8 %rs{{[0-9]+}}, [retval0+6]; -; CHECK-DAG: ld.param.b8 %rs{{[0-9]+}}, [retval0+7]; -; CHECK-DAG: ld.param.b8 %rs{{[0-9]+}}, [retval0+8]; -; CHECK-DAG: ld.param.b8 %rs{{[0-9]+}}, [retval0+9]; -; CHECK-DAG: ld.param.b8 %rs{{[0-9]+}}, [retval0+10]; -; CHECK-DAG: ld.param.b8 %rs{{[0-9]+}}, [retval0+11]; -; CHECK-DAG: ld.param.b8 %rs{{[0-9]+}}, [retval0+12]; -; CHECK-DAG: ld.param.b8 %rs{{[0-9]+}}, [retval0+13]; -; CHECK-DAG: ld.param.b8 %rs{{[0-9]+}}, [retval0+14]; -; CHECK-DAG: ld.param.b8 %rs{{[0-9]+}}, [retval0+15]; -; CHECK-DAG: ld.param.b8 %rs{{[0-9]+}}, [retval0+16]; -; CHECK-DAG: ld.param.b8 %rs{{[0-9]+}}, [retval0+17]; -; CHECK-DAG: ld.param.b8 %rs{{[0-9]+}}, [retval0+18]; -; CHECK-DAG: ld.param.b8 %rs{{[0-9]+}}, [retval0+19]; -; CHECK-DAG: ld.param.b8 %rs{{[0-9]+}}, [retval0+20]; -; CHECK-DAG: ld.param.b8 %rs{{[0-9]+}}, [retval0+21]; -; CHECK-DAG: ld.param.b8 %rs{{[0-9]+}}, [retval0+22]; -; CHECK-DAG: ld.param.b8 %rs{{[0-9]+}}, [retval0+23]; -; CHECK-DAG: ld.param.b8 %rs{{[0-9]+}}, [retval0+24]; +; CHECK: call.uni (retval0), test_s_i1i32x4p, (param0); +; CHECK-DAG: ld.param.b8 %rs{{[0-9]+}}, [retval0+8]; +; CHECK-DAG: ld.param.b8 %r{{[0-9]+}}, [retval0+3]; +; CHECK-DAG: ld.param.b8 %r{{[0-9]+}}, [retval0+2]; +; CHECK-DAG: ld.param.b8 %r{{[0-9]+}}, [retval0+1]; +; CHECK-DAG: ld.param.b8 %r{{[0-9]+}}, [retval0]; +; CHECK-DAG: ld.param.b8 %r{{[0-9]+}}, [retval0+7]; +; CHECK-DAG: ld.param.b8 %r{{[0-9]+}}, [retval0+6]; +; CHECK-DAG: ld.param.b8 %r{{[0-9]+}}, [retval0+5]; +; CHECK-DAG: ld.param.b8 %r{{[0-9]+}}, [retval0+4]; +; CHECK-DAG: ld.param.b8 %r{{[0-9]+}}, [retval0+12]; +; CHECK-DAG: ld.param.b8 %r{{[0-9]+}}, [retval0+11]; +; CHECK-DAG: ld.param.b8 %r{{[0-9]+}}, [retval0+10]; +; CHECK-DAG: ld.param.b8 %r{{[0-9]+}}, [retval0+9]; +; CHECK-DAG: ld.param.b8 %r{{[0-9]+}}, [retval0+16]; +; CHECK-DAG: ld.param.b8 %r{{[0-9]+}}, [retval0+15]; +; CHECK-DAG: ld.param.b8 %r{{[0-9]+}}, [retval0+14]; +; CHECK-DAG: ld.param.b8 %r{{[0-9]+}}, [retval0+13]; +; CHECK-DAG: ld.param.b8 %rd{{[0-9]+}}, [retval0+24]; +; CHECK-DAG: ld.param.b8 %rd{{[0-9]+}}, [retval0+23]; +; CHECK-DAG: ld.param.b8 %rd{{[0-9]+}}, [retval0+22]; +; CHECK-DAG: ld.param.b8 %rd{{[0-9]+}}, [retval0+21]; +; CHECK-DAG: ld.param.b8 %rd{{[0-9]+}}, [retval0+20]; +; CHECK-DAG: ld.param.b8 %rd{{[0-9]+}}, [retval0+19]; +; CHECK-DAG: ld.param.b8 %rd{{[0-9]+}}, [retval0+18]; +; CHECK-DAG: ld.param.b8 %rd{{[0-9]+}}, [retval0+17]; ; CHECK: } // callseq ; CHECK-DAG: st.param.b8 [func_retval0], ; CHECK-DAG: st.param.b8 [func_retval0+1], @@ -1177,13 +1169,13 @@ define %s_i8i32x4p @test_s_i1i32x4p(%s_i8i32x4p %a) { ; CHECK: ld.param.b32 [[E2:%r[0-9]+]], [test_s_crossfield_param_0+8]; ; CHECK: ld.param.v2.b32 {[[E0:%r[0-9]+]], [[E1:%r[0-9]+]]}, [test_s_crossfield_param_0]; ; CHECK: .param .align 16 .b8 param0[80]; -; CHECK: st.param.v2.b32 [param0], {[[E0]], [[E1]]}; -; CHECK: st.param.b32 [param0+8], [[E2]]; -; CHECK: st.param.v4.b32 [param0+16], {[[E3]], [[E4]], [[E5]], [[E6]]}; -; CHECK: st.param.v4.b32 [param0+32], {[[E7]], [[E8]], [[E9]], [[E10]]}; -; CHECK: st.param.v4.b32 [param0+48], {[[E11]], [[E12]], [[E13]], [[E14]]}; -; CHECK: st.param.b32 [param0+64], [[E15]]; ; CHECK: .param .align 16 .b8 retval0[80]; +; CHECK-DAG: st.param.v2.b32 [param0], {[[E0]], [[E1]]}; +; CHECK-DAG: st.param.b32 [param0+8], [[E2]]; +; CHECK-DAG: st.param.v4.b32 [param0+16], {[[E3]], [[E4]], [[E5]], [[E6]]}; +; CHECK-DAG: st.param.v4.b32 [param0+32], {[[E7]], [[E8]], [[E9]], [[E10]]}; +; CHECK-DAG: st.param.v4.b32 [param0+48], {[[E11]], [[E12]], [[E13]], [[E14]]}; +; CHECK-DAG: st.param.b32 [param0+64], [[E15]]; ; CHECK: call.uni (retval0), test_s_crossfield, ; CHECK: ld.param.v2.b32 {[[RE0:%r[0-9]+]], [[RE1:%r[0-9]+]]}, [retval0]; ; CHECK: ld.param.b32 [[RE2:%r[0-9]+]], [retval0+8]; diff --git a/llvm/test/CodeGen/NVPTX/param-overalign.ll b/llvm/test/CodeGen/NVPTX/param-overalign.ll index 88ad0b0..2155fb4 100644 --- a/llvm/test/CodeGen/NVPTX/param-overalign.ll +++ b/llvm/test/CodeGen/NVPTX/param-overalign.ll @@ -28,8 +28,8 @@ define float @caller_md(float %a, float %b) { ; CHECK-NEXT: ld.param.b32 %r2, [caller_md_param_1]; ; CHECK-NEXT: { // callseq 0, 0 ; CHECK-NEXT: .param .align 8 .b8 param0[8]; -; CHECK-NEXT: st.param.v2.b32 [param0], {%r1, %r2}; ; CHECK-NEXT: .param .b32 retval0; +; CHECK-NEXT: st.param.v2.b32 [param0], {%r1, %r2}; ; CHECK-NEXT: call.uni (retval0), callee_md, (param0); ; CHECK-NEXT: ld.param.b32 %r3, [retval0]; ; CHECK-NEXT: } // callseq 0 @@ -69,8 +69,8 @@ define float @caller(float %a, float %b) { ; CHECK-NEXT: ld.param.b32 %r2, [caller_param_1]; ; CHECK-NEXT: { // callseq 1, 0 ; CHECK-NEXT: .param .align 8 .b8 param0[8]; -; CHECK-NEXT: st.param.v2.b32 [param0], {%r1, %r2}; ; CHECK-NEXT: .param .b32 retval0; +; CHECK-NEXT: st.param.v2.b32 [param0], {%r1, %r2}; ; CHECK-NEXT: call.uni (retval0), callee, (param0); ; CHECK-NEXT: ld.param.b32 %r3, [retval0]; ; CHECK-NEXT: } // callseq 1 diff --git a/llvm/test/CodeGen/NVPTX/param-vectorize-device.ll b/llvm/test/CodeGen/NVPTX/param-vectorize-device.ll index a480984a..a592b82 100644 --- a/llvm/test/CodeGen/NVPTX/param-vectorize-device.ll +++ b/llvm/test/CodeGen/NVPTX/param-vectorize-device.ll @@ -84,8 +84,8 @@ define dso_local void @caller_St4x1(ptr nocapture noundef readonly byval(%struct ; CHECK: .param .b64 caller_St4x1_param_1 ; CHECK: ) ; CHECK: .param .b32 param0; - ; CHECK: st.param.b32 [param0], {{%r[0-9]+}}; ; CHECK: .param .align 16 .b8 retval0[4]; + ; CHECK: st.param.b32 [param0], {{%r[0-9]+}}; ; CHECK: call.uni (retval0), callee_St4x1, (param0); ; CHECK: ld.param.b32 {{%r[0-9]+}}, [retval0]; %1 = load i32, ptr %in, align 4 @@ -112,8 +112,8 @@ define dso_local void @caller_St4x2(ptr nocapture noundef readonly byval(%struct ; CHECK: .param .b64 caller_St4x2_param_1 ; CHECK: ) ; CHECK: .param .align 16 .b8 param0[8]; - ; CHECK: st.param.v2.b32 [param0], {{{%r[0-9]+}}, {{%r[0-9]+}}}; ; CHECK: .param .align 16 .b8 retval0[8]; + ; CHECK: st.param.v2.b32 [param0], {{{%r[0-9]+}}, {{%r[0-9]+}}}; ; CHECK: call.uni (retval0), callee_St4x2, (param0); ; CHECK: ld.param.v2.b32 {{{%r[0-9]+}}, {{%r[0-9]+}}}, [retval0]; %agg.tmp = alloca %struct.St4x2, align 8 @@ -149,9 +149,9 @@ define dso_local void @caller_St4x3(ptr nocapture noundef readonly byval(%struct ; CHECK: .param .b64 caller_St4x3_param_1 ; CHECK: ) ; CHECK: .param .align 16 .b8 param0[12]; + ; CHECK: .param .align 16 .b8 retval0[12]; ; CHECK: st.param.v2.b32 [param0], {{{%r[0-9]+}}, {{%r[0-9]+}}}; ; CHECK: st.param.b32 [param0+8], {{%r[0-9]+}}; - ; CHECK: .param .align 16 .b8 retval0[12]; ; CHECK: call.uni (retval0), callee_St4x3, (param0); ; CHECK: ld.param.v2.b32 {{{%r[0-9]+}}, {{%r[0-9]+}}}, [retval0]; ; CHECK: ld.param.b32 {{%r[0-9]+}}, [retval0+8]; @@ -193,8 +193,8 @@ define dso_local void @caller_St4x4(ptr nocapture noundef readonly byval(%struct ; CHECK: .param .b64 caller_St4x4_param_1 ; CHECK: ) ; CHECK: .param .align 16 .b8 param0[16]; - ; CHECK: st.param.v4.b32 [param0], {{{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}}; ; CHECK: .param .align 16 .b8 retval0[16]; + ; CHECK: st.param.v4.b32 [param0], {{{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}}; ; CHECK: call.uni (retval0), callee_St4x4, (param0); ; CHECK: ld.param.v4.b32 {{{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}}, [retval0]; %call = tail call fastcc [4 x i32] @callee_St4x4(ptr noundef nonnull byval(%struct.St4x4) align 4 %in) #2 @@ -239,9 +239,9 @@ define dso_local void @caller_St4x5(ptr nocapture noundef readonly byval(%struct ; CHECK: .param .b64 caller_St4x5_param_1 ; CHECK: ) ; CHECK: .param .align 16 .b8 param0[20]; + ; CHECK: .param .align 16 .b8 retval0[20]; ; CHECK: st.param.v4.b32 [param0], {{{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}}; ; CHECK: st.param.b32 [param0+16], {{%r[0-9]+}}; - ; CHECK: .param .align 16 .b8 retval0[20]; ; CHECK: call.uni (retval0), callee_St4x5, (param0); ; CHECK: ld.param.v4.b32 {{{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}}, [retval0]; ; CHECK: ld.param.b32 {{%r[0-9]+}}, [retval0+16]; @@ -295,9 +295,9 @@ define dso_local void @caller_St4x6(ptr nocapture noundef readonly byval(%struct ; CHECK: .param .b64 caller_St4x6_param_1 ; CHECK: ) ; CHECK: .param .align 16 .b8 param0[24]; + ; CHECK: .param .align 16 .b8 retval0[24]; ; CHECK: st.param.v4.b32 [param0], {{{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}}; ; CHECK: st.param.v2.b32 [param0+16], {{{%r[0-9]+}}, {{%r[0-9]+}}}; - ; CHECK: .param .align 16 .b8 retval0[24]; ; CHECK: call.uni (retval0), callee_St4x6, (param0); ; CHECK: ld.param.v4.b32 {{{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}}, [retval0]; ; CHECK: ld.param.v2.b32 {{{%r[0-9]+}}, {{%r[0-9]+}}}, [retval0+16]; @@ -357,10 +357,10 @@ define dso_local void @caller_St4x7(ptr nocapture noundef readonly byval(%struct ; CHECK: .param .b64 caller_St4x7_param_1 ; CHECK: ) ; CHECK: .param .align 16 .b8 param0[28]; + ; CHECK: .param .align 16 .b8 retval0[28]; ; CHECK: st.param.v4.b32 [param0], {{{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}}; ; CHECK: st.param.v2.b32 [param0+16], {{{%r[0-9]+}}, {{%r[0-9]+}}}; ; CHECK: st.param.b32 [param0+24], {{%r[0-9]+}}; - ; CHECK: .param .align 16 .b8 retval0[28]; ; CHECK: call.uni (retval0), callee_St4x7, (param0); ; CHECK: ld.param.v4.b32 {{{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}}, [retval0]; ; CHECK: ld.param.v2.b32 {{{%r[0-9]+}}, {{%r[0-9]+}}}, [retval0+16]; @@ -429,9 +429,9 @@ define dso_local void @caller_St4x8(ptr nocapture noundef readonly byval(%struct ; CHECK: .param .b64 caller_St4x8_param_1 ; CHECK: ) ; CHECK: .param .align 16 .b8 param0[32]; - ; CHECK: st.param.v4.b32 [param0], {{{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}}; - ; CHECK: st.param.v4.b32 [param0+16], {{{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}}; ; CHECK: .param .align 16 .b8 retval0[32]; + ; CHECK-DAG: st.param.v4.b32 [param0], {{{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}}; + ; CHECK-DAG: st.param.v4.b32 [param0+16], {{{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}}; ; CHECK: call.uni (retval0), callee_St4x8, (param0); ; CHECK: ld.param.v4.b32 {{{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}}, [retval0]; ; CHECK: ld.param.v4.b32 {{{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}}, [retval0+16]; @@ -503,8 +503,8 @@ define dso_local void @caller_St8x1(ptr nocapture noundef readonly byval(%struct ; CHECK: .param .b64 caller_St8x1_param_1 ; CHECK: ) ; CHECK: .param .b64 param0; - ; CHECK: st.param.b64 [param0], {{%rd[0-9]+}}; ; CHECK: .param .align 16 .b8 retval0[8]; + ; CHECK: st.param.b64 [param0], {{%rd[0-9]+}}; ; CHECK: call.uni (retval0), callee_St8x1, (param0); ; CHECK: ld.param.b64 {{%rd[0-9]+}}, [retval0]; %1 = load i64, ptr %in, align 8 @@ -531,8 +531,8 @@ define dso_local void @caller_St8x2(ptr nocapture noundef readonly byval(%struct ; CHECK: .param .b64 caller_St8x2_param_1 ; CHECK: ) ; CHECK: .param .align 16 .b8 param0[16]; - ; CHECK: st.param.v2.b64 [param0], {{{%rd[0-9]+}}, {{%rd[0-9]+}}}; ; CHECK: .param .align 16 .b8 retval0[16]; + ; CHECK: st.param.v2.b64 [param0], {{{%rd[0-9]+}}, {{%rd[0-9]+}}}; ; CHECK: call.uni (retval0), callee_St8x2, (param0); ; CHECK: ld.param.v2.b64 {{{%rd[0-9]+}}, {{%rd[0-9]+}}}, [retval0]; %call = tail call fastcc [2 x i64] @callee_St8x2(ptr noundef nonnull byval(%struct.St8x2) align 8 %in) #2 @@ -565,9 +565,9 @@ define dso_local void @caller_St8x3(ptr nocapture noundef readonly byval(%struct ; CHECK: .param .b64 caller_St8x3_param_1 ; CHECK: ) ; CHECK: .param .align 16 .b8 param0[24]; + ; CHECK: .param .align 16 .b8 retval0[24]; ; CHECK: st.param.v2.b64 [param0], {{{%rd[0-9]+}}, {{%rd[0-9]+}}}; ; CHECK: st.param.b64 [param0+16], {{%rd[0-9]+}}; - ; CHECK: .param .align 16 .b8 retval0[24]; ; CHECK: call.uni (retval0), callee_St8x3, (param0); ; CHECK: ld.param.v2.b64 {{{%rd[0-9]+}}, {{%rd[0-9]+}}}, [retval0]; ; CHECK: ld.param.b64 {{%rd[0-9]+}}, [retval0+16]; @@ -609,9 +609,9 @@ define dso_local void @caller_St8x4(ptr nocapture noundef readonly byval(%struct ; CHECK: .param .b64 caller_St8x4_param_1 ; CHECK: ) ; CHECK: .param .align 16 .b8 param0[32]; - ; CHECK: st.param.v2.b64 [param0], {{{%rd[0-9]+}}, {{%rd[0-9]+}}}; - ; CHECK: st.param.v2.b64 [param0+16], {{{%rd[0-9]+}}, {{%rd[0-9]+}}}; ; CHECK: .param .align 16 .b8 retval0[32]; + ; CHECK-DAG: st.param.v2.b64 [param0], {{{%rd[0-9]+}}, {{%rd[0-9]+}}}; + ; CHECK-DAG: st.param.v2.b64 [param0+16], {{{%rd[0-9]+}}, {{%rd[0-9]+}}}; ; CHECK: call.uni (retval0), callee_St8x4, (param0); ; CHECK: ld.param.v2.b64 {{{%rd[0-9]+}}, {{%rd[0-9]+}}}, [retval0]; ; CHECK: ld.param.v2.b64 {{{%rd[0-9]+}}, {{%rd[0-9]+}}}, [retval0+16]; diff --git a/llvm/test/CodeGen/NVPTX/pr126337.ll b/llvm/test/CodeGen/NVPTX/pr126337.ll index 32e4115..95258f7 100644 --- a/llvm/test/CodeGen/NVPTX/pr126337.ll +++ b/llvm/test/CodeGen/NVPTX/pr126337.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_70 | FileCheck %s -; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 -mcpu=sm_70 | %ptxas-verify %} +; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 -mcpu=sm_70 | %ptxas -arch=sm_70 -c - %} ; This IR should compile without triggering assertions in LICM ; when the CopyToReg from %0 in the first BB gets eliminated diff --git a/llvm/test/CodeGen/NVPTX/proxy-reg-erasure.mir b/llvm/test/CodeGen/NVPTX/proxy-reg-erasure.mir index 5d0d6f6..4a53152 100644 --- a/llvm/test/CodeGen/NVPTX/proxy-reg-erasure.mir +++ b/llvm/test/CodeGen/NVPTX/proxy-reg-erasure.mir @@ -77,7 +77,7 @@ constants: [] machineFunctionInfo: {} body: | bb.0: - %0:b32, %1:b32, %2:b32, %3:b32 = LoadParamMemV4I32 0 + %0:b32, %1:b32, %2:b32, %3:b32 = LDV_i32_v4 0, 0, 101, 3, 32, &retval0, 0 :: (load (s128), addrspace 101) ; CHECK-NOT: ProxyReg %4:b32 = ProxyRegB32 killed %0 %5:b32 = ProxyRegB32 killed %1 @@ -86,7 +86,7 @@ body: | ; CHECK: STV_i32_v4 killed %0, killed %1, killed %2, killed %3 STV_i32_v4 killed %4, killed %5, killed %6, killed %7, 0, 0, 101, 32, &func_retval0, 0 :: (store (s128), addrspace 101) - %8:b32 = LoadParamMemI32 0 + %8:b32 = LD_i32 0, 0, 101, 3, 32, &retval0, 0 :: (load (s32), addrspace 101) ; CHECK-NOT: ProxyReg %9:b32 = ProxyRegB32 killed %8 %10:b32 = ProxyRegB32 killed %9 diff --git a/llvm/test/CodeGen/NVPTX/st-param-imm.ll b/llvm/test/CodeGen/NVPTX/st-param-imm.ll index 6aa1119..f90435a 100644 --- a/llvm/test/CodeGen/NVPTX/st-param-imm.ll +++ b/llvm/test/CodeGen/NVPTX/st-param-imm.ll @@ -26,8 +26,8 @@ define void @st_param_i8_i16() { ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: { // callseq 0, 0 ; CHECK-NEXT: .param .align 2 .b8 param0[4]; -; CHECK-NEXT: st.param.b8 [param0], 1; ; CHECK-NEXT: st.param.b16 [param0+2], 2; +; CHECK-NEXT: st.param.b8 [param0], 1; ; CHECK-NEXT: call.uni call_i8_i16, (param0); ; CHECK-NEXT: } // callseq 0 ; CHECK-NEXT: ret; @@ -75,7 +75,7 @@ define void @st_param_f32() { ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: { // callseq 3, 0 ; CHECK-NEXT: .param .b32 param0; -; CHECK-NEXT: st.param.b32 [param0], 0f40A00000; +; CHECK-NEXT: st.param.b32 [param0], 1084227584; ; CHECK-NEXT: call.uni call_f32, (param0); ; CHECK-NEXT: } // callseq 3 ; CHECK-NEXT: ret; @@ -91,7 +91,7 @@ define void @st_param_f64() { ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: { // callseq 4, 0 ; CHECK-NEXT: .param .b64 param0; -; CHECK-NEXT: st.param.b64 [param0], 0d4018000000000000; +; CHECK-NEXT: st.param.b64 [param0], 4618441417868443648; ; CHECK-NEXT: call.uni call_f64, (param0); ; CHECK-NEXT: } // callseq 4 ; CHECK-NEXT: ret; @@ -165,7 +165,7 @@ define void @st_param_v2_i16_ii() { ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: { // callseq 8, 0 ; CHECK-NEXT: .param .align 4 .b8 param0[4]; -; CHECK-NEXT: st.param.v2.b16 [param0], {1, 2}; +; CHECK-NEXT: st.param.b32 [param0], 131073; ; CHECK-NEXT: call.uni call_v2_i16, (param0); ; CHECK-NEXT: } // callseq 8 ; CHECK-NEXT: ret; @@ -432,7 +432,7 @@ define void @st_param_v4_i8_iiii() { ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: { // callseq 23, 0 ; CHECK-NEXT: .param .align 4 .b8 param0[4]; -; CHECK-NEXT: st.param.v4.b8 [param0], {1, 2, 3, 4}; +; CHECK-NEXT: st.param.b32 [param0], 67305985; ; CHECK-NEXT: call.uni call_v4_i8, (param0); ; CHECK-NEXT: } // callseq 23 ; CHECK-NEXT: ret; @@ -442,15 +442,18 @@ define void @st_param_v4_i8_iiii() { define void @st_param_v4_i8_irrr(i8 %b, i8 %c, i8 %d) { ; CHECK-LABEL: st_param_v4_i8_irrr( ; CHECK: { -; CHECK-NEXT: .reg .b16 %rs<4>; +; CHECK-NEXT: .reg .b32 %r<7>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: ld.param.b8 %rs1, [st_param_v4_i8_irrr_param_2]; -; CHECK-NEXT: ld.param.b8 %rs2, [st_param_v4_i8_irrr_param_1]; -; CHECK-NEXT: ld.param.b8 %rs3, [st_param_v4_i8_irrr_param_0]; +; CHECK-NEXT: ld.param.b8 %r1, [st_param_v4_i8_irrr_param_2]; +; CHECK-NEXT: ld.param.b8 %r2, [st_param_v4_i8_irrr_param_1]; +; CHECK-NEXT: prmt.b32 %r3, %r2, %r1, 0x3340U; +; CHECK-NEXT: ld.param.b8 %r4, [st_param_v4_i8_irrr_param_0]; +; CHECK-NEXT: prmt.b32 %r5, 1, %r4, 0x3340U; +; CHECK-NEXT: prmt.b32 %r6, %r5, %r3, 0x5410U; ; CHECK-NEXT: { // callseq 24, 0 ; CHECK-NEXT: .param .align 4 .b8 param0[4]; -; CHECK-NEXT: st.param.v4.b8 [param0], {1, %rs3, %rs2, %rs1}; +; CHECK-NEXT: st.param.b32 [param0], %r6; ; CHECK-NEXT: call.uni call_v4_i8, (param0); ; CHECK-NEXT: } // callseq 24 ; CHECK-NEXT: ret; @@ -464,15 +467,18 @@ define void @st_param_v4_i8_irrr(i8 %b, i8 %c, i8 %d) { define void @st_param_v4_i8_rirr(i8 %a, i8 %c, i8 %d) { ; CHECK-LABEL: st_param_v4_i8_rirr( ; CHECK: { -; CHECK-NEXT: .reg .b16 %rs<4>; +; CHECK-NEXT: .reg .b32 %r<7>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: ld.param.b8 %rs1, [st_param_v4_i8_rirr_param_2]; -; CHECK-NEXT: ld.param.b8 %rs2, [st_param_v4_i8_rirr_param_1]; -; CHECK-NEXT: ld.param.b8 %rs3, [st_param_v4_i8_rirr_param_0]; +; CHECK-NEXT: ld.param.b8 %r1, [st_param_v4_i8_rirr_param_2]; +; CHECK-NEXT: ld.param.b8 %r2, [st_param_v4_i8_rirr_param_1]; +; CHECK-NEXT: prmt.b32 %r3, %r2, %r1, 0x3340U; +; CHECK-NEXT: ld.param.b8 %r4, [st_param_v4_i8_rirr_param_0]; +; CHECK-NEXT: prmt.b32 %r5, %r4, 2, 0x3340U; +; CHECK-NEXT: prmt.b32 %r6, %r5, %r3, 0x5410U; ; CHECK-NEXT: { // callseq 25, 0 ; CHECK-NEXT: .param .align 4 .b8 param0[4]; -; CHECK-NEXT: st.param.v4.b8 [param0], {%rs3, 2, %rs2, %rs1}; +; CHECK-NEXT: st.param.b32 [param0], %r6; ; CHECK-NEXT: call.uni call_v4_i8, (param0); ; CHECK-NEXT: } // callseq 25 ; CHECK-NEXT: ret; @@ -486,15 +492,18 @@ define void @st_param_v4_i8_rirr(i8 %a, i8 %c, i8 %d) { define void @st_param_v4_i8_rrir(i8 %a, i8 %b, i8 %d) { ; CHECK-LABEL: st_param_v4_i8_rrir( ; CHECK: { -; CHECK-NEXT: .reg .b16 %rs<4>; +; CHECK-NEXT: .reg .b32 %r<7>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: ld.param.b8 %rs1, [st_param_v4_i8_rrir_param_2]; -; CHECK-NEXT: ld.param.b8 %rs2, [st_param_v4_i8_rrir_param_1]; -; CHECK-NEXT: ld.param.b8 %rs3, [st_param_v4_i8_rrir_param_0]; +; CHECK-NEXT: ld.param.b8 %r1, [st_param_v4_i8_rrir_param_1]; +; CHECK-NEXT: ld.param.b8 %r2, [st_param_v4_i8_rrir_param_0]; +; CHECK-NEXT: prmt.b32 %r3, %r2, %r1, 0x3340U; +; CHECK-NEXT: ld.param.b8 %r4, [st_param_v4_i8_rrir_param_2]; +; CHECK-NEXT: prmt.b32 %r5, 3, %r4, 0x3340U; +; CHECK-NEXT: prmt.b32 %r6, %r3, %r5, 0x5410U; ; CHECK-NEXT: { // callseq 26, 0 ; CHECK-NEXT: .param .align 4 .b8 param0[4]; -; CHECK-NEXT: st.param.v4.b8 [param0], {%rs3, %rs2, 3, %rs1}; +; CHECK-NEXT: st.param.b32 [param0], %r6; ; CHECK-NEXT: call.uni call_v4_i8, (param0); ; CHECK-NEXT: } // callseq 26 ; CHECK-NEXT: ret; @@ -508,15 +517,18 @@ define void @st_param_v4_i8_rrir(i8 %a, i8 %b, i8 %d) { define void @st_param_v4_i8_rrri(i8 %a, i8 %b, i8 %c) { ; CHECK-LABEL: st_param_v4_i8_rrri( ; CHECK: { -; CHECK-NEXT: .reg .b16 %rs<4>; +; CHECK-NEXT: .reg .b32 %r<7>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: ld.param.b8 %rs1, [st_param_v4_i8_rrri_param_2]; -; CHECK-NEXT: ld.param.b8 %rs2, [st_param_v4_i8_rrri_param_1]; -; CHECK-NEXT: ld.param.b8 %rs3, [st_param_v4_i8_rrri_param_0]; +; CHECK-NEXT: ld.param.b8 %r1, [st_param_v4_i8_rrri_param_1]; +; CHECK-NEXT: ld.param.b8 %r2, [st_param_v4_i8_rrri_param_0]; +; CHECK-NEXT: prmt.b32 %r3, %r2, %r1, 0x3340U; +; CHECK-NEXT: ld.param.b8 %r4, [st_param_v4_i8_rrri_param_2]; +; CHECK-NEXT: prmt.b32 %r5, %r4, 4, 0x3340U; +; CHECK-NEXT: prmt.b32 %r6, %r3, %r5, 0x5410U; ; CHECK-NEXT: { // callseq 27, 0 ; CHECK-NEXT: .param .align 4 .b8 param0[4]; -; CHECK-NEXT: st.param.v4.b8 [param0], {%rs3, %rs2, %rs1, 4}; +; CHECK-NEXT: st.param.b32 [param0], %r6; ; CHECK-NEXT: call.uni call_v4_i8, (param0); ; CHECK-NEXT: } // callseq 27 ; CHECK-NEXT: ret; @@ -530,14 +542,16 @@ define void @st_param_v4_i8_rrri(i8 %a, i8 %b, i8 %c) { define void @st_param_v4_i8_iirr(i8 %c, i8 %d) { ; CHECK-LABEL: st_param_v4_i8_iirr( ; CHECK: { -; CHECK-NEXT: .reg .b16 %rs<3>; +; CHECK-NEXT: .reg .b32 %r<5>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: ld.param.b8 %rs1, [st_param_v4_i8_iirr_param_1]; -; CHECK-NEXT: ld.param.b8 %rs2, [st_param_v4_i8_iirr_param_0]; +; CHECK-NEXT: ld.param.b8 %r1, [st_param_v4_i8_iirr_param_1]; +; CHECK-NEXT: ld.param.b8 %r2, [st_param_v4_i8_iirr_param_0]; +; CHECK-NEXT: prmt.b32 %r3, %r2, %r1, 0x3340U; +; CHECK-NEXT: prmt.b32 %r4, 513, %r3, 0x5410U; ; CHECK-NEXT: { // callseq 28, 0 ; CHECK-NEXT: .param .align 4 .b8 param0[4]; -; CHECK-NEXT: st.param.v4.b8 [param0], {1, 2, %rs2, %rs1}; +; CHECK-NEXT: st.param.b32 [param0], %r4; ; CHECK-NEXT: call.uni call_v4_i8, (param0); ; CHECK-NEXT: } // callseq 28 ; CHECK-NEXT: ret; @@ -551,14 +565,17 @@ define void @st_param_v4_i8_iirr(i8 %c, i8 %d) { define void @st_param_v4_i8_irir(i8 %b, i8 %d) { ; CHECK-LABEL: st_param_v4_i8_irir( ; CHECK: { -; CHECK-NEXT: .reg .b16 %rs<3>; +; CHECK-NEXT: .reg .b32 %r<6>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: ld.param.b8 %rs1, [st_param_v4_i8_irir_param_1]; -; CHECK-NEXT: ld.param.b8 %rs2, [st_param_v4_i8_irir_param_0]; +; CHECK-NEXT: ld.param.b8 %r1, [st_param_v4_i8_irir_param_1]; +; CHECK-NEXT: prmt.b32 %r2, 3, %r1, 0x3340U; +; CHECK-NEXT: ld.param.b8 %r3, [st_param_v4_i8_irir_param_0]; +; CHECK-NEXT: prmt.b32 %r4, 1, %r3, 0x3340U; +; CHECK-NEXT: prmt.b32 %r5, %r4, %r2, 0x5410U; ; CHECK-NEXT: { // callseq 29, 0 ; CHECK-NEXT: .param .align 4 .b8 param0[4]; -; CHECK-NEXT: st.param.v4.b8 [param0], {1, %rs2, 3, %rs1}; +; CHECK-NEXT: st.param.b32 [param0], %r5; ; CHECK-NEXT: call.uni call_v4_i8, (param0); ; CHECK-NEXT: } // callseq 29 ; CHECK-NEXT: ret; @@ -572,14 +589,17 @@ define void @st_param_v4_i8_irir(i8 %b, i8 %d) { define void @st_param_v4_i8_irri(i8 %b, i8 %c) { ; CHECK-LABEL: st_param_v4_i8_irri( ; CHECK: { -; CHECK-NEXT: .reg .b16 %rs<3>; +; CHECK-NEXT: .reg .b32 %r<6>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: ld.param.b8 %rs1, [st_param_v4_i8_irri_param_1]; -; CHECK-NEXT: ld.param.b8 %rs2, [st_param_v4_i8_irri_param_0]; +; CHECK-NEXT: ld.param.b8 %r1, [st_param_v4_i8_irri_param_1]; +; CHECK-NEXT: prmt.b32 %r2, %r1, 4, 0x3340U; +; CHECK-NEXT: ld.param.b8 %r3, [st_param_v4_i8_irri_param_0]; +; CHECK-NEXT: prmt.b32 %r4, 1, %r3, 0x3340U; +; CHECK-NEXT: prmt.b32 %r5, %r4, %r2, 0x5410U; ; CHECK-NEXT: { // callseq 30, 0 ; CHECK-NEXT: .param .align 4 .b8 param0[4]; -; CHECK-NEXT: st.param.v4.b8 [param0], {1, %rs2, %rs1, 4}; +; CHECK-NEXT: st.param.b32 [param0], %r5; ; CHECK-NEXT: call.uni call_v4_i8, (param0); ; CHECK-NEXT: } // callseq 30 ; CHECK-NEXT: ret; @@ -593,14 +613,17 @@ define void @st_param_v4_i8_irri(i8 %b, i8 %c) { define void @st_param_v4_i8_riir(i8 %a, i8 %d) { ; CHECK-LABEL: st_param_v4_i8_riir( ; CHECK: { -; CHECK-NEXT: .reg .b16 %rs<3>; +; CHECK-NEXT: .reg .b32 %r<6>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: ld.param.b8 %rs1, [st_param_v4_i8_riir_param_1]; -; CHECK-NEXT: ld.param.b8 %rs2, [st_param_v4_i8_riir_param_0]; +; CHECK-NEXT: ld.param.b8 %r1, [st_param_v4_i8_riir_param_1]; +; CHECK-NEXT: prmt.b32 %r2, 3, %r1, 0x3340U; +; CHECK-NEXT: ld.param.b8 %r3, [st_param_v4_i8_riir_param_0]; +; CHECK-NEXT: prmt.b32 %r4, %r3, 2, 0x3340U; +; CHECK-NEXT: prmt.b32 %r5, %r4, %r2, 0x5410U; ; CHECK-NEXT: { // callseq 31, 0 ; CHECK-NEXT: .param .align 4 .b8 param0[4]; -; CHECK-NEXT: st.param.v4.b8 [param0], {%rs2, 2, 3, %rs1}; +; CHECK-NEXT: st.param.b32 [param0], %r5; ; CHECK-NEXT: call.uni call_v4_i8, (param0); ; CHECK-NEXT: } // callseq 31 ; CHECK-NEXT: ret; @@ -614,14 +637,17 @@ define void @st_param_v4_i8_riir(i8 %a, i8 %d) { define void @st_param_v4_i8_riri(i8 %a, i8 %c) { ; CHECK-LABEL: st_param_v4_i8_riri( ; CHECK: { -; CHECK-NEXT: .reg .b16 %rs<3>; +; CHECK-NEXT: .reg .b32 %r<6>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: ld.param.b8 %rs1, [st_param_v4_i8_riri_param_1]; -; CHECK-NEXT: ld.param.b8 %rs2, [st_param_v4_i8_riri_param_0]; +; CHECK-NEXT: ld.param.b8 %r1, [st_param_v4_i8_riri_param_1]; +; CHECK-NEXT: prmt.b32 %r2, %r1, 4, 0x3340U; +; CHECK-NEXT: ld.param.b8 %r3, [st_param_v4_i8_riri_param_0]; +; CHECK-NEXT: prmt.b32 %r4, %r3, 2, 0x3340U; +; CHECK-NEXT: prmt.b32 %r5, %r4, %r2, 0x5410U; ; CHECK-NEXT: { // callseq 32, 0 ; CHECK-NEXT: .param .align 4 .b8 param0[4]; -; CHECK-NEXT: st.param.v4.b8 [param0], {%rs2, 2, %rs1, 4}; +; CHECK-NEXT: st.param.b32 [param0], %r5; ; CHECK-NEXT: call.uni call_v4_i8, (param0); ; CHECK-NEXT: } // callseq 32 ; CHECK-NEXT: ret; @@ -635,14 +661,16 @@ define void @st_param_v4_i8_riri(i8 %a, i8 %c) { define void @st_param_v4_i8_rrii(i8 %a, i8 %b) { ; CHECK-LABEL: st_param_v4_i8_rrii( ; CHECK: { -; CHECK-NEXT: .reg .b16 %rs<3>; +; CHECK-NEXT: .reg .b32 %r<5>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: ld.param.b8 %rs1, [st_param_v4_i8_rrii_param_1]; -; CHECK-NEXT: ld.param.b8 %rs2, [st_param_v4_i8_rrii_param_0]; +; CHECK-NEXT: ld.param.b8 %r1, [st_param_v4_i8_rrii_param_1]; +; CHECK-NEXT: ld.param.b8 %r2, [st_param_v4_i8_rrii_param_0]; +; CHECK-NEXT: prmt.b32 %r3, %r2, %r1, 0x3340U; +; CHECK-NEXT: prmt.b32 %r4, %r3, 1027, 0x5410U; ; CHECK-NEXT: { // callseq 33, 0 ; CHECK-NEXT: .param .align 4 .b8 param0[4]; -; CHECK-NEXT: st.param.v4.b8 [param0], {%rs2, %rs1, 3, 4}; +; CHECK-NEXT: st.param.b32 [param0], %r4; ; CHECK-NEXT: call.uni call_v4_i8, (param0); ; CHECK-NEXT: } // callseq 33 ; CHECK-NEXT: ret; @@ -656,13 +684,15 @@ define void @st_param_v4_i8_rrii(i8 %a, i8 %b) { define void @st_param_v4_i8_iiir(i8 %d) { ; CHECK-LABEL: st_param_v4_i8_iiir( ; CHECK: { -; CHECK-NEXT: .reg .b16 %rs<2>; +; CHECK-NEXT: .reg .b32 %r<4>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: ld.param.b8 %rs1, [st_param_v4_i8_iiir_param_0]; ; CHECK-NEXT: { // callseq 34, 0 ; CHECK-NEXT: .param .align 4 .b8 param0[4]; -; CHECK-NEXT: st.param.v4.b8 [param0], {1, 2, 3, %rs1}; +; CHECK-NEXT: ld.param.b8 %r1, [st_param_v4_i8_iiir_param_0]; +; CHECK-NEXT: prmt.b32 %r2, 3, %r1, 0x3340U; +; CHECK-NEXT: prmt.b32 %r3, 513, %r2, 0x5410U; +; CHECK-NEXT: st.param.b32 [param0], %r3; ; CHECK-NEXT: call.uni call_v4_i8, (param0); ; CHECK-NEXT: } // callseq 34 ; CHECK-NEXT: ret; @@ -676,13 +706,15 @@ define void @st_param_v4_i8_iiir(i8 %d) { define void @st_param_v4_i8_iiri(i8 %c) { ; CHECK-LABEL: st_param_v4_i8_iiri( ; CHECK: { -; CHECK-NEXT: .reg .b16 %rs<2>; +; CHECK-NEXT: .reg .b32 %r<4>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: ld.param.b8 %rs1, [st_param_v4_i8_iiri_param_0]; ; CHECK-NEXT: { // callseq 35, 0 ; CHECK-NEXT: .param .align 4 .b8 param0[4]; -; CHECK-NEXT: st.param.v4.b8 [param0], {1, 2, %rs1, 4}; +; CHECK-NEXT: ld.param.b8 %r1, [st_param_v4_i8_iiri_param_0]; +; CHECK-NEXT: prmt.b32 %r2, %r1, 4, 0x3340U; +; CHECK-NEXT: prmt.b32 %r3, 513, %r2, 0x5410U; +; CHECK-NEXT: st.param.b32 [param0], %r3; ; CHECK-NEXT: call.uni call_v4_i8, (param0); ; CHECK-NEXT: } // callseq 35 ; CHECK-NEXT: ret; @@ -696,13 +728,15 @@ define void @st_param_v4_i8_iiri(i8 %c) { define void @st_param_v4_i8_irii(i8 %b) { ; CHECK-LABEL: st_param_v4_i8_irii( ; CHECK: { -; CHECK-NEXT: .reg .b16 %rs<2>; +; CHECK-NEXT: .reg .b32 %r<4>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: ld.param.b8 %rs1, [st_param_v4_i8_irii_param_0]; ; CHECK-NEXT: { // callseq 36, 0 ; CHECK-NEXT: .param .align 4 .b8 param0[4]; -; CHECK-NEXT: st.param.v4.b8 [param0], {1, %rs1, 3, 4}; +; CHECK-NEXT: ld.param.b8 %r1, [st_param_v4_i8_irii_param_0]; +; CHECK-NEXT: prmt.b32 %r2, 1, %r1, 0x3340U; +; CHECK-NEXT: prmt.b32 %r3, %r2, 1027, 0x5410U; +; CHECK-NEXT: st.param.b32 [param0], %r3; ; CHECK-NEXT: call.uni call_v4_i8, (param0); ; CHECK-NEXT: } // callseq 36 ; CHECK-NEXT: ret; @@ -716,13 +750,15 @@ define void @st_param_v4_i8_irii(i8 %b) { define void @st_param_v4_i8_riii(i8 %a) { ; CHECK-LABEL: st_param_v4_i8_riii( ; CHECK: { -; CHECK-NEXT: .reg .b16 %rs<2>; +; CHECK-NEXT: .reg .b32 %r<4>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: ld.param.b8 %rs1, [st_param_v4_i8_riii_param_0]; ; CHECK-NEXT: { // callseq 37, 0 ; CHECK-NEXT: .param .align 4 .b8 param0[4]; -; CHECK-NEXT: st.param.v4.b8 [param0], {%rs1, 2, 3, 4}; +; CHECK-NEXT: ld.param.b8 %r1, [st_param_v4_i8_riii_param_0]; +; CHECK-NEXT: prmt.b32 %r2, %r1, 2, 0x3340U; +; CHECK-NEXT: prmt.b32 %r3, %r2, 1027, 0x5410U; +; CHECK-NEXT: st.param.b32 [param0], %r3; ; CHECK-NEXT: call.uni call_v4_i8, (param0); ; CHECK-NEXT: } // callseq 37 ; CHECK-NEXT: ret; @@ -742,7 +778,7 @@ define void @st_param_v4_i16_iiii() { ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: { // callseq 38, 0 ; CHECK-NEXT: .param .align 8 .b8 param0[8]; -; CHECK-NEXT: st.param.v4.b16 [param0], {1, 2, 3, 4}; +; CHECK-NEXT: st.param.v2.b32 [param0], {131073, 262147}; ; CHECK-NEXT: call.uni call_v4_i16, (param0); ; CHECK-NEXT: } // callseq 38 ; CHECK-NEXT: ret; @@ -841,13 +877,15 @@ define void @st_param_v4_i16_iirr(i16 %c, i16 %d) { ; CHECK-LABEL: st_param_v4_i16_iirr( ; CHECK: { ; CHECK-NEXT: .reg .b16 %rs<3>; +; CHECK-NEXT: .reg .b32 %r<2>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b16 %rs1, [st_param_v4_i16_iirr_param_0]; ; CHECK-NEXT: ld.param.b16 %rs2, [st_param_v4_i16_iirr_param_1]; +; CHECK-NEXT: mov.b32 %r1, {%rs1, %rs2}; ; CHECK-NEXT: { // callseq 43, 0 ; CHECK-NEXT: .param .align 8 .b8 param0[8]; -; CHECK-NEXT: st.param.v4.b16 [param0], {1, 2, %rs1, %rs2}; +; CHECK-NEXT: st.param.v2.b32 [param0], {131073, %r1}; ; CHECK-NEXT: call.uni call_v4_i16, (param0); ; CHECK-NEXT: } // callseq 43 ; CHECK-NEXT: ret; @@ -946,13 +984,15 @@ define void @st_param_v4_i16_rrii(i16 %a, i16 %b) { ; CHECK-LABEL: st_param_v4_i16_rrii( ; CHECK: { ; CHECK-NEXT: .reg .b16 %rs<3>; +; CHECK-NEXT: .reg .b32 %r<2>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b16 %rs1, [st_param_v4_i16_rrii_param_0]; ; CHECK-NEXT: ld.param.b16 %rs2, [st_param_v4_i16_rrii_param_1]; +; CHECK-NEXT: mov.b32 %r1, {%rs1, %rs2}; ; CHECK-NEXT: { // callseq 48, 0 ; CHECK-NEXT: .param .align 8 .b8 param0[8]; -; CHECK-NEXT: st.param.v4.b16 [param0], {%rs1, %rs2, 3, 4}; +; CHECK-NEXT: st.param.v2.b32 [param0], {%r1, 262147}; ; CHECK-NEXT: call.uni call_v4_i16, (param0); ; CHECK-NEXT: } // callseq 48 ; CHECK-NEXT: ret; @@ -966,13 +1006,16 @@ define void @st_param_v4_i16_rrii(i16 %a, i16 %b) { define void @st_param_v4_i16_iiir(i16 %d) { ; CHECK-LABEL: st_param_v4_i16_iiir( ; CHECK: { -; CHECK-NEXT: .reg .b16 %rs<2>; +; CHECK-NEXT: .reg .b16 %rs<3>; +; CHECK-NEXT: .reg .b32 %r<2>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b16 %rs1, [st_param_v4_i16_iiir_param_0]; +; CHECK-NEXT: mov.b16 %rs2, 3; +; CHECK-NEXT: mov.b32 %r1, {%rs2, %rs1}; ; CHECK-NEXT: { // callseq 49, 0 ; CHECK-NEXT: .param .align 8 .b8 param0[8]; -; CHECK-NEXT: st.param.v4.b16 [param0], {1, 2, 3, %rs1}; +; CHECK-NEXT: st.param.v2.b32 [param0], {131073, %r1}; ; CHECK-NEXT: call.uni call_v4_i16, (param0); ; CHECK-NEXT: } // callseq 49 ; CHECK-NEXT: ret; @@ -986,13 +1029,16 @@ define void @st_param_v4_i16_iiir(i16 %d) { define void @st_param_v4_i16_iiri(i16 %c) { ; CHECK-LABEL: st_param_v4_i16_iiri( ; CHECK: { -; CHECK-NEXT: .reg .b16 %rs<2>; +; CHECK-NEXT: .reg .b16 %rs<3>; +; CHECK-NEXT: .reg .b32 %r<2>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b16 %rs1, [st_param_v4_i16_iiri_param_0]; +; CHECK-NEXT: mov.b16 %rs2, 4; +; CHECK-NEXT: mov.b32 %r1, {%rs1, %rs2}; ; CHECK-NEXT: { // callseq 50, 0 ; CHECK-NEXT: .param .align 8 .b8 param0[8]; -; CHECK-NEXT: st.param.v4.b16 [param0], {1, 2, %rs1, 4}; +; CHECK-NEXT: st.param.v2.b32 [param0], {131073, %r1}; ; CHECK-NEXT: call.uni call_v4_i16, (param0); ; CHECK-NEXT: } // callseq 50 ; CHECK-NEXT: ret; @@ -1006,13 +1052,16 @@ define void @st_param_v4_i16_iiri(i16 %c) { define void @st_param_v4_i16_irii(i16 %b) { ; CHECK-LABEL: st_param_v4_i16_irii( ; CHECK: { -; CHECK-NEXT: .reg .b16 %rs<2>; +; CHECK-NEXT: .reg .b16 %rs<3>; +; CHECK-NEXT: .reg .b32 %r<2>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b16 %rs1, [st_param_v4_i16_irii_param_0]; +; CHECK-NEXT: mov.b16 %rs2, 1; +; CHECK-NEXT: mov.b32 %r1, {%rs2, %rs1}; ; CHECK-NEXT: { // callseq 51, 0 ; CHECK-NEXT: .param .align 8 .b8 param0[8]; -; CHECK-NEXT: st.param.v4.b16 [param0], {1, %rs1, 3, 4}; +; CHECK-NEXT: st.param.v2.b32 [param0], {%r1, 262147}; ; CHECK-NEXT: call.uni call_v4_i16, (param0); ; CHECK-NEXT: } // callseq 51 ; CHECK-NEXT: ret; @@ -1026,13 +1075,16 @@ define void @st_param_v4_i16_irii(i16 %b) { define void @st_param_v4_i16_riii(i16 %a) { ; CHECK-LABEL: st_param_v4_i16_riii( ; CHECK: { -; CHECK-NEXT: .reg .b16 %rs<2>; +; CHECK-NEXT: .reg .b16 %rs<3>; +; CHECK-NEXT: .reg .b32 %r<2>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b16 %rs1, [st_param_v4_i16_riii_param_0]; +; CHECK-NEXT: mov.b16 %rs2, 2; +; CHECK-NEXT: mov.b32 %r1, {%rs1, %rs2}; ; CHECK-NEXT: { // callseq 52, 0 ; CHECK-NEXT: .param .align 8 .b8 param0[8]; -; CHECK-NEXT: st.param.v4.b16 [param0], {%rs1, 2, 3, 4}; +; CHECK-NEXT: st.param.v2.b32 [param0], {%r1, 262147}; ; CHECK-NEXT: call.uni call_v4_i16, (param0); ; CHECK-NEXT: } // callseq 52 ; CHECK-NEXT: ret; @@ -1672,13 +1724,12 @@ declare void @call_v4_f32(%struct.float4 alignstack(16)) define void @st_param_bfloat() { ; CHECK-LABEL: st_param_bfloat( ; CHECK: { -; CHECK-NEXT: .reg .b16 %rs<2>; +; CHECK-EMPTY: ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: mov.b16 %rs1, 0x4100; ; CHECK-NEXT: { // callseq 83, 0 ; CHECK-NEXT: .param .align 2 .b8 param0[2]; -; CHECK-NEXT: st.param.b16 [param0], %rs1; +; CHECK-NEXT: st.param.b16 [param0], 0x4100; ; CHECK-NEXT: call.uni call_bfloat, (param0); ; CHECK-NEXT: } // callseq 83 ; CHECK-NEXT: ret; diff --git a/llvm/test/CodeGen/NVPTX/store-undef.ll b/llvm/test/CodeGen/NVPTX/store-undef.ll index 5b31b5e..c8ca6b6 100644 --- a/llvm/test/CodeGen/NVPTX/store-undef.ll +++ b/llvm/test/CodeGen/NVPTX/store-undef.ll @@ -34,9 +34,9 @@ define void @test_store_param_def(i64 %param0, i32 %param1) { ; CHECK-NEXT: ld.param.b32 %r1, [test_store_param_def_param_1]; ; CHECK-NEXT: { // callseq 1, 0 ; CHECK-NEXT: .param .align 16 .b8 param0[32]; +; CHECK-NEXT: st.param.v4.b32 [param0+16], {%r2, %r1, %r3, %r4}; +; CHECK-NEXT: st.param.v2.b32 [param0+8], {%r5, %r1}; ; CHECK-NEXT: st.param.b64 [param0], %rd1; -; CHECK-NEXT: st.param.v2.b32 [param0+8], {%r2, %r1}; -; CHECK-NEXT: st.param.v4.b32 [param0+16], {%r3, %r1, %r4, %r5}; ; CHECK-NEXT: call.uni test_call, (param0); ; CHECK-NEXT: } // callseq 1 ; CHECK-NEXT: ret; diff --git a/llvm/test/CodeGen/NVPTX/tanhf.ll b/llvm/test/CodeGen/NVPTX/tanhf.ll new file mode 100644 index 0000000..6f4eb22 --- /dev/null +++ b/llvm/test/CodeGen/NVPTX/tanhf.ll @@ -0,0 +1,40 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc < %s -mcpu=sm_75 -mattr=+ptx70 | FileCheck %s +; RUN: %if ptxas-11.0 %{ llc < %s -mcpu=sm_75 -mattr=+ptx70 | %ptxas-verify -arch=sm_75 %} + +target triple = "nvptx64-nvidia-cuda" + +define float @test1(float %in) local_unnamed_addr { +; CHECK-LABEL: test1( +; CHECK: { +; CHECK-NEXT: .reg .b32 %r<3>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b32 %r1, [test1_param_0]; +; CHECK-NEXT: tanh.approx.f32 %r2, %r1; +; CHECK-NEXT: st.param.b32 [func_retval0], %r2; +; CHECK-NEXT: ret; + %call = call afn float @llvm.tanh.f32(float %in) + ret float %call +} + +define half @test2(half %in) local_unnamed_addr { +; CHECK-LABEL: test2( +; CHECK: { +; CHECK-NEXT: .reg .b16 %rs<3>; +; CHECK-NEXT: .reg .b32 %r<3>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b16 %rs1, [test2_param_0]; +; CHECK-NEXT: cvt.f32.f16 %r1, %rs1; +; CHECK-NEXT: tanh.approx.f32 %r2, %r1; +; CHECK-NEXT: cvt.rn.f16.f32 %rs2, %r2; +; CHECK-NEXT: st.param.b16 [func_retval0], %rs2; +; CHECK-NEXT: ret; + %call = call afn half @llvm.tanh.f16(half %in) + ret half %call +} + +declare float @llvm.tanh.f32(float) +declare half @llvm.tanh.f16(half) + diff --git a/llvm/test/CodeGen/NVPTX/tex-read-cuda.ll b/llvm/test/CodeGen/NVPTX/tex-read-cuda.ll index d6961a9..3138d7c 100644 --- a/llvm/test/CodeGen/NVPTX/tex-read-cuda.ll +++ b/llvm/test/CodeGen/NVPTX/tex-read-cuda.ll @@ -69,8 +69,8 @@ define ptx_kernel void @baz(ptr %red, i32 %idx) { ; CHECK-NEXT: tex.1d.v4.f32.s32 {%r2, %r3, %r4, %r5}, [tex0, {%r1}]; ; CHECK-NEXT: { // callseq 0, 0 ; CHECK-NEXT: .param .b64 param0; -; CHECK-NEXT: st.param.b64 [param0], %rd3; ; CHECK-NEXT: .param .b32 retval0; +; CHECK-NEXT: st.param.b64 [param0], %rd3; ; CHECK-NEXT: call.uni (retval0), texfunc, (param0); ; CHECK-NEXT: ld.param.b32 %r6, [retval0]; ; CHECK-NEXT: } // callseq 0 diff --git a/llvm/test/CodeGen/NVPTX/unaligned-param-load-store.ll b/llvm/test/CodeGen/NVPTX/unaligned-param-load-store.ll index 87e46b1..697eb90 100644 --- a/llvm/test/CodeGen/NVPTX/unaligned-param-load-store.ll +++ b/llvm/test/CodeGen/NVPTX/unaligned-param-load-store.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; Verifies correctness of load/store of parameters and return values. -; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_35 -O0 -verify-machineinstrs | FileCheck -allow-deprecated-dag-overlap %s -; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 -mcpu=sm_35 -O0 -verify-machineinstrs | %ptxas-verify %} +; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_35 -verify-machineinstrs | FileCheck -allow-deprecated-dag-overlap %s +; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 -mcpu=sm_35 -verify-machineinstrs | %ptxas-verify %} %s_i8i16p = type { <{ i16, i8, i16 }>, i64 } %s_i8i32p = type { <{ i32, i8, i32 }>, i64 } @@ -24,37 +24,35 @@ define %s_i8i16p @test_s_i8i16p(%s_i8i16p %a) { ; CHECK-LABEL: test_s_i8i16p( ; CHECK: { -; CHECK-NEXT: .reg .b16 %rs<15>; +; CHECK-NEXT: .reg .b16 %rs<13>; +; CHECK-NEXT: .reg .b32 %r<2>; ; CHECK-NEXT: .reg .b64 %rd<4>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: ld.param.b8 %rs4, [test_s_i8i16p_param_0+4]; -; CHECK-NEXT: shl.b16 %rs5, %rs4, 8; -; CHECK-NEXT: ld.param.b8 %rs6, [test_s_i8i16p_param_0+3]; -; CHECK-NEXT: or.b16 %rs3, %rs5, %rs6; +; CHECK-NEXT: ld.param.b32 %r1, [test_s_i8i16p_param_0]; ; CHECK-NEXT: ld.param.b64 %rd1, [test_s_i8i16p_param_0+8]; -; CHECK-NEXT: ld.param.b8 %rs2, [test_s_i8i16p_param_0+2]; -; CHECK-NEXT: ld.param.b16 %rs1, [test_s_i8i16p_param_0]; +; CHECK-NEXT: ld.param.b8 %rs1, [test_s_i8i16p_param_0+4]; ; CHECK-NEXT: { // callseq 0, 0 ; CHECK-NEXT: .param .align 8 .b8 param0[16]; -; CHECK-NEXT: st.param.b16 [param0], %rs1; -; CHECK-NEXT: st.param.b8 [param0+2], %rs2; -; CHECK-NEXT: st.param.b8 [param0+3], %rs3; -; CHECK-NEXT: st.param.b8 [param0+4], %rs4; -; CHECK-NEXT: st.param.b64 [param0+8], %rd1; ; CHECK-NEXT: .param .align 8 .b8 retval0[16]; +; CHECK-NEXT: st.param.b8 [param0+4], %rs1; +; CHECK-NEXT: st.param.b64 [param0+8], %rd1; +; CHECK-NEXT: st.param.b32 [param0], %r1; ; CHECK-NEXT: call.uni (retval0), test_s_i8i16p, (param0); -; CHECK-NEXT: ld.param.b16 %rs7, [retval0]; -; CHECK-NEXT: ld.param.b8 %rs8, [retval0+2]; -; CHECK-NEXT: ld.param.b8 %rs9, [retval0+3]; -; CHECK-NEXT: ld.param.b8 %rs10, [retval0+4]; ; CHECK-NEXT: ld.param.b64 %rd2, [retval0+8]; +; CHECK-NEXT: ld.param.b8 %rs2, [retval0+2]; +; CHECK-NEXT: ld.param.b16 %rs3, [retval0]; +; CHECK-NEXT: ld.param.b8 %rs4, [retval0+4]; +; CHECK-NEXT: ld.param.b8 %rs5, [retval0+3]; ; CHECK-NEXT: } // callseq 0 -; CHECK-NEXT: st.param.b16 [func_retval0], %rs7; -; CHECK-NEXT: st.param.b8 [func_retval0+2], %rs8; -; CHECK-NEXT: st.param.b8 [func_retval0+4], %rs10; -; CHECK-NEXT: st.param.b8 [func_retval0+3], %rs9; +; CHECK-NEXT: shl.b16 %rs8, %rs4, 8; +; CHECK-NEXT: or.b16 %rs9, %rs8, %rs5; +; CHECK-NEXT: st.param.b8 [func_retval0+3], %rs5; ; CHECK-NEXT: st.param.b64 [func_retval0+8], %rd2; +; CHECK-NEXT: st.param.b8 [func_retval0+2], %rs2; +; CHECK-NEXT: st.param.b16 [func_retval0], %rs3; +; CHECK-NEXT: shr.u16 %rs12, %rs9, 8; +; CHECK-NEXT: st.param.b8 [func_retval0+4], %rs12; ; CHECK-NEXT: ret; %r = tail call %s_i8i16p @test_s_i8i16p(%s_i8i16p %a) ret %s_i8i16p %r @@ -64,56 +62,51 @@ define %s_i8i16p @test_s_i8i16p(%s_i8i16p %a) { define %s_i8i32p @test_s_i8i32p(%s_i8i32p %a) { ; CHECK-LABEL: test_s_i8i32p( ; CHECK: { -; CHECK-NEXT: .reg .b16 %rs<12>; -; CHECK-NEXT: .reg .b32 %r<20>; +; CHECK-NEXT: .reg .b16 %rs<4>; +; CHECK-NEXT: .reg .b32 %r<24>; ; CHECK-NEXT: .reg .b64 %rd<4>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: ld.param.b8 %r3, [test_s_i8i32p_param_0+6]; -; CHECK-NEXT: shl.b32 %r4, %r3, 8; -; CHECK-NEXT: ld.param.b8 %r5, [test_s_i8i32p_param_0+5]; -; CHECK-NEXT: or.b32 %r6, %r4, %r5; -; CHECK-NEXT: ld.param.b8 %r7, [test_s_i8i32p_param_0+7]; -; CHECK-NEXT: shl.b32 %r8, %r7, 16; -; CHECK-NEXT: ld.param.b8 %r9, [test_s_i8i32p_param_0+8]; -; CHECK-NEXT: shl.b32 %r10, %r9, 24; -; CHECK-NEXT: or.b32 %r11, %r10, %r8; -; CHECK-NEXT: or.b32 %r2, %r11, %r6; -; CHECK-NEXT: ld.param.b64 %rd1, [test_s_i8i32p_param_0+16]; -; CHECK-NEXT: ld.param.b8 %rs1, [test_s_i8i32p_param_0+4]; ; CHECK-NEXT: ld.param.b32 %r1, [test_s_i8i32p_param_0]; -; CHECK-NEXT: shr.u32 %r12, %r2, 8; -; CHECK-NEXT: shr.u32 %r13, %r11, 16; +; CHECK-NEXT: ld.param.b16 %rs1, [test_s_i8i32p_param_0+4]; +; CHECK-NEXT: ld.param.b64 %rd1, [test_s_i8i32p_param_0+16]; +; CHECK-NEXT: ld.param.b8 %r2, [test_s_i8i32p_param_0+6]; +; CHECK-NEXT: ld.param.b8 %r3, [test_s_i8i32p_param_0+7]; +; CHECK-NEXT: ld.param.b8 %r4, [test_s_i8i32p_param_0+8]; ; CHECK-NEXT: { // callseq 1, 0 ; CHECK-NEXT: .param .align 8 .b8 param0[24]; -; CHECK-NEXT: st.param.b32 [param0], %r1; -; CHECK-NEXT: st.param.b8 [param0+4], %rs1; -; CHECK-NEXT: st.param.b8 [param0+5], %r2; -; CHECK-NEXT: st.param.b8 [param0+6], %r12; -; CHECK-NEXT: st.param.b8 [param0+7], %r13; -; CHECK-NEXT: st.param.b8 [param0+8], %r9; -; CHECK-NEXT: st.param.b64 [param0+16], %rd1; ; CHECK-NEXT: .param .align 8 .b8 retval0[24]; +; CHECK-NEXT: st.param.b8 [param0+8], %r4; +; CHECK-NEXT: st.param.b8 [param0+7], %r3; +; CHECK-NEXT: st.param.b8 [param0+6], %r2; +; CHECK-NEXT: st.param.b64 [param0+16], %rd1; +; CHECK-NEXT: st.param.b16 [param0+4], %rs1; +; CHECK-NEXT: st.param.b32 [param0], %r1; ; CHECK-NEXT: call.uni (retval0), test_s_i8i32p, (param0); -; CHECK-NEXT: ld.param.b32 %r14, [retval0]; -; CHECK-NEXT: ld.param.b8 %rs2, [retval0+4]; -; CHECK-NEXT: ld.param.b8 %rs3, [retval0+5]; -; CHECK-NEXT: ld.param.b8 %rs4, [retval0+6]; -; CHECK-NEXT: ld.param.b8 %rs5, [retval0+7]; -; CHECK-NEXT: ld.param.b8 %rs6, [retval0+8]; ; CHECK-NEXT: ld.param.b64 %rd2, [retval0+16]; +; CHECK-NEXT: ld.param.b8 %rs2, [retval0+4]; +; CHECK-NEXT: ld.param.b32 %r5, [retval0]; +; CHECK-NEXT: ld.param.b8 %r6, [retval0+8]; +; CHECK-NEXT: ld.param.b8 %r7, [retval0+7]; +; CHECK-NEXT: ld.param.b8 %r8, [retval0+6]; +; CHECK-NEXT: ld.param.b8 %r9, [retval0+5]; ; CHECK-NEXT: } // callseq 1 -; CHECK-NEXT: cvt.u32.u16 %r15, %rs3; -; CHECK-NEXT: cvt.u32.u16 %r16, %rs4; -; CHECK-NEXT: cvt.u32.u16 %r17, %rs5; -; CHECK-NEXT: cvt.u32.u16 %r18, %rs6; -; CHECK-NEXT: st.param.b32 [func_retval0], %r14; -; CHECK-NEXT: st.param.b8 [func_retval0+4], %rs2; -; CHECK-NEXT: st.param.b8 [func_retval0+8], %r18; -; CHECK-NEXT: st.param.b8 [func_retval0+7], %r17; -; CHECK-NEXT: st.param.b8 [func_retval0+6], %r16; -; CHECK-NEXT: st.param.b8 [func_retval0+5], %r15; +; CHECK-NEXT: shl.b32 %r12, %r8, 8; +; CHECK-NEXT: or.b32 %r13, %r12, %r9; +; CHECK-NEXT: shl.b32 %r15, %r7, 16; +; CHECK-NEXT: shl.b32 %r17, %r6, 24; +; CHECK-NEXT: or.b32 %r18, %r17, %r15; +; CHECK-NEXT: or.b32 %r19, %r18, %r13; +; CHECK-NEXT: st.param.b8 [func_retval0+5], %r9; ; CHECK-NEXT: st.param.b64 [func_retval0+16], %rd2; +; CHECK-NEXT: st.param.b8 [func_retval0+4], %rs2; +; CHECK-NEXT: st.param.b32 [func_retval0], %r5; +; CHECK-NEXT: shr.u32 %r21, %r19, 24; +; CHECK-NEXT: st.param.b8 [func_retval0+8], %r21; +; CHECK-NEXT: shr.u32 %r22, %r19, 16; +; CHECK-NEXT: st.param.b8 [func_retval0+7], %r22; +; CHECK-NEXT: shr.u32 %r23, %r19, 8; +; CHECK-NEXT: st.param.b8 [func_retval0+6], %r23; ; CHECK-NEXT: ret; %r = tail call %s_i8i32p @test_s_i8i32p(%s_i8i32p %a) ret %s_i8i32p %r @@ -123,112 +116,66 @@ define %s_i8i32p @test_s_i8i32p(%s_i8i32p %a) { define %s_i8i64p @test_s_i8i64p(%s_i8i64p %a) { ; CHECK-LABEL: test_s_i8i64p( ; CHECK: { -; CHECK-NEXT: .reg .b16 %rs<20>; -; CHECK-NEXT: .reg .b64 %rd<68>; +; CHECK-NEXT: .reg .b16 %rs<3>; +; CHECK-NEXT: .reg .b64 %rd<46>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: ld.param.b8 %rd4, [test_s_i8i64p_param_0+10]; -; CHECK-NEXT: shl.b64 %rd5, %rd4, 8; -; CHECK-NEXT: ld.param.b8 %rd6, [test_s_i8i64p_param_0+9]; -; CHECK-NEXT: or.b64 %rd7, %rd5, %rd6; -; CHECK-NEXT: ld.param.b8 %rd8, [test_s_i8i64p_param_0+11]; -; CHECK-NEXT: shl.b64 %rd9, %rd8, 16; -; CHECK-NEXT: ld.param.b8 %rd10, [test_s_i8i64p_param_0+12]; -; CHECK-NEXT: shl.b64 %rd11, %rd10, 24; -; CHECK-NEXT: or.b64 %rd12, %rd11, %rd9; -; CHECK-NEXT: or.b64 %rd13, %rd12, %rd7; -; CHECK-NEXT: ld.param.b8 %rd14, [test_s_i8i64p_param_0+14]; -; CHECK-NEXT: shl.b64 %rd15, %rd14, 8; -; CHECK-NEXT: ld.param.b8 %rd16, [test_s_i8i64p_param_0+13]; -; CHECK-NEXT: or.b64 %rd17, %rd15, %rd16; -; CHECK-NEXT: ld.param.b8 %rd18, [test_s_i8i64p_param_0+15]; -; CHECK-NEXT: shl.b64 %rd19, %rd18, 16; -; CHECK-NEXT: ld.param.b8 %rd20, [test_s_i8i64p_param_0+16]; -; CHECK-NEXT: shl.b64 %rd21, %rd20, 24; -; CHECK-NEXT: or.b64 %rd22, %rd21, %rd19; -; CHECK-NEXT: or.b64 %rd23, %rd22, %rd17; -; CHECK-NEXT: shl.b64 %rd24, %rd23, 32; -; CHECK-NEXT: or.b64 %rd2, %rd24, %rd13; -; CHECK-NEXT: ld.param.b64 %rd3, [test_s_i8i64p_param_0+24]; -; CHECK-NEXT: ld.param.b8 %rs1, [test_s_i8i64p_param_0+8]; ; CHECK-NEXT: ld.param.b64 %rd1, [test_s_i8i64p_param_0]; -; CHECK-NEXT: shr.u64 %rd25, %rd2, 8; -; CHECK-NEXT: shr.u64 %rd26, %rd2, 16; -; CHECK-NEXT: shr.u64 %rd27, %rd2, 24; -; CHECK-NEXT: bfe.u64 %rd28, %rd23, 8, 24; -; CHECK-NEXT: bfe.u64 %rd29, %rd23, 16, 16; -; CHECK-NEXT: bfe.u64 %rd30, %rd23, 24, 8; +; CHECK-NEXT: ld.param.b64 %rd2, [test_s_i8i64p_param_0+8]; +; CHECK-NEXT: ld.param.b64 %rd3, [test_s_i8i64p_param_0+24]; +; CHECK-NEXT: ld.param.b8 %rd4, [test_s_i8i64p_param_0+16]; ; CHECK-NEXT: { // callseq 2, 0 ; CHECK-NEXT: .param .align 8 .b8 param0[32]; -; CHECK-NEXT: st.param.b64 [param0], %rd1; -; CHECK-NEXT: st.param.b8 [param0+8], %rs1; -; CHECK-NEXT: st.param.b8 [param0+9], %rd2; -; CHECK-NEXT: st.param.b8 [param0+10], %rd25; -; CHECK-NEXT: st.param.b8 [param0+11], %rd26; -; CHECK-NEXT: st.param.b8 [param0+12], %rd27; -; CHECK-NEXT: st.param.b8 [param0+13], %rd23; -; CHECK-NEXT: st.param.b8 [param0+14], %rd28; -; CHECK-NEXT: st.param.b8 [param0+15], %rd29; -; CHECK-NEXT: st.param.b8 [param0+16], %rd30; -; CHECK-NEXT: st.param.b64 [param0+24], %rd3; ; CHECK-NEXT: .param .align 8 .b8 retval0[32]; +; CHECK-NEXT: st.param.b8 [param0+16], %rd4; +; CHECK-NEXT: st.param.b64 [param0+24], %rd3; +; CHECK-NEXT: st.param.b64 [param0+8], %rd2; +; CHECK-NEXT: st.param.b64 [param0], %rd1; ; CHECK-NEXT: call.uni (retval0), test_s_i8i64p, (param0); -; CHECK-NEXT: ld.param.b64 %rd31, [retval0]; -; CHECK-NEXT: ld.param.b8 %rs2, [retval0+8]; -; CHECK-NEXT: ld.param.b8 %rs3, [retval0+9]; -; CHECK-NEXT: ld.param.b8 %rs4, [retval0+10]; -; CHECK-NEXT: ld.param.b8 %rs5, [retval0+11]; -; CHECK-NEXT: ld.param.b8 %rs6, [retval0+12]; -; CHECK-NEXT: ld.param.b8 %rs7, [retval0+13]; -; CHECK-NEXT: ld.param.b8 %rs8, [retval0+14]; -; CHECK-NEXT: ld.param.b8 %rs9, [retval0+15]; -; CHECK-NEXT: ld.param.b8 %rs10, [retval0+16]; -; CHECK-NEXT: ld.param.b64 %rd32, [retval0+24]; +; CHECK-NEXT: ld.param.b64 %rd5, [retval0+24]; +; CHECK-NEXT: ld.param.b8 %rs1, [retval0+8]; +; CHECK-NEXT: ld.param.b64 %rd6, [retval0]; +; CHECK-NEXT: ld.param.b8 %rd7, [retval0+16]; +; CHECK-NEXT: ld.param.b8 %rd8, [retval0+15]; +; CHECK-NEXT: ld.param.b8 %rd9, [retval0+14]; +; CHECK-NEXT: ld.param.b8 %rd10, [retval0+13]; +; CHECK-NEXT: ld.param.b8 %rd11, [retval0+12]; +; CHECK-NEXT: ld.param.b8 %rd12, [retval0+11]; +; CHECK-NEXT: ld.param.b8 %rd13, [retval0+10]; +; CHECK-NEXT: ld.param.b8 %rd14, [retval0+9]; ; CHECK-NEXT: } // callseq 2 -; CHECK-NEXT: cvt.u64.u16 %rd33, %rs3; -; CHECK-NEXT: and.b64 %rd34, %rd33, 255; -; CHECK-NEXT: cvt.u64.u16 %rd35, %rs4; -; CHECK-NEXT: and.b64 %rd36, %rd35, 255; -; CHECK-NEXT: shl.b64 %rd37, %rd36, 8; -; CHECK-NEXT: or.b64 %rd38, %rd34, %rd37; -; CHECK-NEXT: cvt.u64.u16 %rd39, %rs5; -; CHECK-NEXT: and.b64 %rd40, %rd39, 255; -; CHECK-NEXT: shl.b64 %rd41, %rd40, 16; -; CHECK-NEXT: or.b64 %rd42, %rd38, %rd41; -; CHECK-NEXT: cvt.u64.u16 %rd43, %rs6; -; CHECK-NEXT: and.b64 %rd44, %rd43, 255; -; CHECK-NEXT: shl.b64 %rd45, %rd44, 24; -; CHECK-NEXT: or.b64 %rd46, %rd42, %rd45; -; CHECK-NEXT: cvt.u64.u16 %rd47, %rs7; -; CHECK-NEXT: and.b64 %rd48, %rd47, 255; -; CHECK-NEXT: shl.b64 %rd49, %rd48, 32; -; CHECK-NEXT: or.b64 %rd50, %rd46, %rd49; -; CHECK-NEXT: cvt.u64.u16 %rd51, %rs8; -; CHECK-NEXT: and.b64 %rd52, %rd51, 255; -; CHECK-NEXT: shl.b64 %rd53, %rd52, 40; -; CHECK-NEXT: or.b64 %rd54, %rd50, %rd53; -; CHECK-NEXT: cvt.u64.u16 %rd55, %rs9; -; CHECK-NEXT: and.b64 %rd56, %rd55, 255; -; CHECK-NEXT: shl.b64 %rd57, %rd56, 48; -; CHECK-NEXT: or.b64 %rd58, %rd54, %rd57; -; CHECK-NEXT: cvt.u64.u16 %rd59, %rs10; -; CHECK-NEXT: shl.b64 %rd60, %rd59, 56; -; CHECK-NEXT: or.b64 %rd61, %rd58, %rd60; -; CHECK-NEXT: st.param.b64 [func_retval0], %rd31; -; CHECK-NEXT: st.param.b8 [func_retval0+8], %rs2; +; CHECK-NEXT: shl.b64 %rd17, %rd13, 8; +; CHECK-NEXT: or.b64 %rd18, %rd17, %rd14; +; CHECK-NEXT: shl.b64 %rd20, %rd12, 16; +; CHECK-NEXT: shl.b64 %rd22, %rd11, 24; +; CHECK-NEXT: or.b64 %rd23, %rd22, %rd20; +; CHECK-NEXT: or.b64 %rd24, %rd23, %rd18; +; CHECK-NEXT: shl.b64 %rd27, %rd9, 8; +; CHECK-NEXT: or.b64 %rd28, %rd27, %rd10; +; CHECK-NEXT: shl.b64 %rd30, %rd8, 16; +; CHECK-NEXT: shl.b64 %rd32, %rd7, 24; +; CHECK-NEXT: or.b64 %rd33, %rd32, %rd30; +; CHECK-NEXT: or.b64 %rd34, %rd33, %rd28; +; CHECK-NEXT: shl.b64 %rd35, %rd34, 32; +; CHECK-NEXT: or.b64 %rd36, %rd35, %rd24; +; CHECK-NEXT: st.param.b8 [func_retval0+9], %rd14; +; CHECK-NEXT: st.param.b64 [func_retval0+24], %rd5; +; CHECK-NEXT: st.param.b8 [func_retval0+8], %rs1; +; CHECK-NEXT: st.param.b64 [func_retval0], %rd6; +; CHECK-NEXT: shr.u64 %rd39, %rd36, 56; +; CHECK-NEXT: st.param.b8 [func_retval0+16], %rd39; +; CHECK-NEXT: shr.u64 %rd40, %rd36, 48; +; CHECK-NEXT: st.param.b8 [func_retval0+15], %rd40; +; CHECK-NEXT: shr.u64 %rd41, %rd36, 40; +; CHECK-NEXT: st.param.b8 [func_retval0+14], %rd41; +; CHECK-NEXT: shr.u64 %rd42, %rd36, 32; +; CHECK-NEXT: st.param.b8 [func_retval0+13], %rd42; +; CHECK-NEXT: shr.u64 %rd43, %rd36, 24; ; CHECK-NEXT: st.param.b8 [func_retval0+12], %rd43; -; CHECK-NEXT: st.param.b8 [func_retval0+11], %rd39; -; CHECK-NEXT: st.param.b8 [func_retval0+10], %rd35; -; CHECK-NEXT: st.param.b8 [func_retval0+9], %rd33; -; CHECK-NEXT: shr.u64 %rd64, %rd50, 32; -; CHECK-NEXT: st.param.b8 [func_retval0+13], %rd64; -; CHECK-NEXT: shr.u64 %rd65, %rd54, 40; -; CHECK-NEXT: st.param.b8 [func_retval0+14], %rd65; -; CHECK-NEXT: shr.u64 %rd66, %rd58, 48; -; CHECK-NEXT: st.param.b8 [func_retval0+15], %rd66; -; CHECK-NEXT: shr.u64 %rd67, %rd61, 56; -; CHECK-NEXT: st.param.b8 [func_retval0+16], %rd67; -; CHECK-NEXT: st.param.b64 [func_retval0+24], %rd32; +; CHECK-NEXT: shr.u64 %rd44, %rd36, 16; +; CHECK-NEXT: st.param.b8 [func_retval0+11], %rd44; +; CHECK-NEXT: shr.u64 %rd45, %rd36, 8; +; CHECK-NEXT: st.param.b8 [func_retval0+10], %rd45; ; CHECK-NEXT: ret; %r = tail call %s_i8i64p @test_s_i8i64p(%s_i8i64p %a) ret %s_i8i64p %r @@ -242,33 +189,32 @@ define %s_i8f16p @test_s_i8f16p(%s_i8f16p %a) { ; CHECK-NEXT: .reg .b64 %rd<4>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: ld.param.b8 %rs4, [test_s_i8f16p_param_0+4]; -; CHECK-NEXT: shl.b16 %rs5, %rs4, 8; -; CHECK-NEXT: ld.param.b8 %rs6, [test_s_i8f16p_param_0+3]; -; CHECK-NEXT: or.b16 %rs3, %rs5, %rs6; -; CHECK-NEXT: ld.param.b64 %rd1, [test_s_i8f16p_param_0+8]; -; CHECK-NEXT: ld.param.b8 %rs2, [test_s_i8f16p_param_0+2]; ; CHECK-NEXT: ld.param.b16 %rs1, [test_s_i8f16p_param_0]; +; CHECK-NEXT: ld.param.b16 %rs2, [test_s_i8f16p_param_0+2]; +; CHECK-NEXT: ld.param.b64 %rd1, [test_s_i8f16p_param_0+8]; +; CHECK-NEXT: ld.param.b8 %rs3, [test_s_i8f16p_param_0+4]; ; CHECK-NEXT: { // callseq 3, 0 ; CHECK-NEXT: .param .align 8 .b8 param0[16]; -; CHECK-NEXT: st.param.b16 [param0], %rs1; -; CHECK-NEXT: st.param.b8 [param0+2], %rs2; -; CHECK-NEXT: st.param.b8 [param0+3], %rs3; -; CHECK-NEXT: st.param.b8 [param0+4], %rs4; -; CHECK-NEXT: st.param.b64 [param0+8], %rd1; ; CHECK-NEXT: .param .align 8 .b8 retval0[16]; +; CHECK-NEXT: st.param.b8 [param0+4], %rs3; +; CHECK-NEXT: st.param.b64 [param0+8], %rd1; +; CHECK-NEXT: st.param.b16 [param0+2], %rs2; +; CHECK-NEXT: st.param.b16 [param0], %rs1; ; CHECK-NEXT: call.uni (retval0), test_s_i8f16p, (param0); -; CHECK-NEXT: ld.param.b16 %rs7, [retval0]; -; CHECK-NEXT: ld.param.b8 %rs8, [retval0+2]; -; CHECK-NEXT: ld.param.b8 %rs9, [retval0+3]; -; CHECK-NEXT: ld.param.b8 %rs10, [retval0+4]; ; CHECK-NEXT: ld.param.b64 %rd2, [retval0+8]; +; CHECK-NEXT: ld.param.b8 %rs4, [retval0+2]; +; CHECK-NEXT: ld.param.b16 %rs5, [retval0]; +; CHECK-NEXT: ld.param.b8 %rs6, [retval0+4]; +; CHECK-NEXT: ld.param.b8 %rs7, [retval0+3]; ; CHECK-NEXT: } // callseq 3 -; CHECK-NEXT: st.param.b16 [func_retval0], %rs7; -; CHECK-NEXT: st.param.b8 [func_retval0+2], %rs8; -; CHECK-NEXT: st.param.b8 [func_retval0+4], %rs10; -; CHECK-NEXT: st.param.b8 [func_retval0+3], %rs9; +; CHECK-NEXT: shl.b16 %rs10, %rs6, 8; +; CHECK-NEXT: or.b16 %rs11, %rs10, %rs7; +; CHECK-NEXT: st.param.b8 [func_retval0+3], %rs7; ; CHECK-NEXT: st.param.b64 [func_retval0+8], %rd2; +; CHECK-NEXT: st.param.b8 [func_retval0+2], %rs4; +; CHECK-NEXT: st.param.b16 [func_retval0], %rs5; +; CHECK-NEXT: shr.u16 %rs14, %rs11, 8; +; CHECK-NEXT: st.param.b8 [func_retval0+4], %rs14; ; CHECK-NEXT: ret; %r = tail call %s_i8f16p @test_s_i8f16p(%s_i8f16p %a) ret %s_i8f16p %r @@ -278,56 +224,51 @@ define %s_i8f16p @test_s_i8f16p(%s_i8f16p %a) { define %s_i8f16x2p @test_s_i8f16x2p(%s_i8f16x2p %a) { ; CHECK-LABEL: test_s_i8f16x2p( ; CHECK: { -; CHECK-NEXT: .reg .b16 %rs<12>; -; CHECK-NEXT: .reg .b32 %r<20>; +; CHECK-NEXT: .reg .b16 %rs<4>; +; CHECK-NEXT: .reg .b32 %r<24>; ; CHECK-NEXT: .reg .b64 %rd<4>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: ld.param.b8 %r3, [test_s_i8f16x2p_param_0+6]; -; CHECK-NEXT: shl.b32 %r4, %r3, 8; -; CHECK-NEXT: ld.param.b8 %r5, [test_s_i8f16x2p_param_0+5]; -; CHECK-NEXT: or.b32 %r6, %r4, %r5; -; CHECK-NEXT: ld.param.b8 %r7, [test_s_i8f16x2p_param_0+7]; -; CHECK-NEXT: shl.b32 %r8, %r7, 16; -; CHECK-NEXT: ld.param.b8 %r9, [test_s_i8f16x2p_param_0+8]; -; CHECK-NEXT: shl.b32 %r10, %r9, 24; -; CHECK-NEXT: or.b32 %r11, %r10, %r8; -; CHECK-NEXT: or.b32 %r2, %r11, %r6; -; CHECK-NEXT: ld.param.b64 %rd1, [test_s_i8f16x2p_param_0+16]; -; CHECK-NEXT: ld.param.b8 %rs1, [test_s_i8f16x2p_param_0+4]; ; CHECK-NEXT: ld.param.b32 %r1, [test_s_i8f16x2p_param_0]; -; CHECK-NEXT: shr.u32 %r12, %r2, 8; -; CHECK-NEXT: shr.u32 %r13, %r11, 16; +; CHECK-NEXT: ld.param.b16 %rs1, [test_s_i8f16x2p_param_0+4]; +; CHECK-NEXT: ld.param.b64 %rd1, [test_s_i8f16x2p_param_0+16]; +; CHECK-NEXT: ld.param.b8 %r2, [test_s_i8f16x2p_param_0+6]; +; CHECK-NEXT: ld.param.b8 %r3, [test_s_i8f16x2p_param_0+7]; +; CHECK-NEXT: ld.param.b8 %r4, [test_s_i8f16x2p_param_0+8]; ; CHECK-NEXT: { // callseq 4, 0 ; CHECK-NEXT: .param .align 8 .b8 param0[24]; -; CHECK-NEXT: st.param.b32 [param0], %r1; -; CHECK-NEXT: st.param.b8 [param0+4], %rs1; -; CHECK-NEXT: st.param.b8 [param0+5], %r2; -; CHECK-NEXT: st.param.b8 [param0+6], %r12; -; CHECK-NEXT: st.param.b8 [param0+7], %r13; -; CHECK-NEXT: st.param.b8 [param0+8], %r9; -; CHECK-NEXT: st.param.b64 [param0+16], %rd1; ; CHECK-NEXT: .param .align 8 .b8 retval0[24]; +; CHECK-NEXT: st.param.b8 [param0+8], %r4; +; CHECK-NEXT: st.param.b8 [param0+7], %r3; +; CHECK-NEXT: st.param.b8 [param0+6], %r2; +; CHECK-NEXT: st.param.b64 [param0+16], %rd1; +; CHECK-NEXT: st.param.b16 [param0+4], %rs1; +; CHECK-NEXT: st.param.b32 [param0], %r1; ; CHECK-NEXT: call.uni (retval0), test_s_i8f16x2p, (param0); -; CHECK-NEXT: ld.param.b32 %r14, [retval0]; -; CHECK-NEXT: ld.param.b8 %rs2, [retval0+4]; -; CHECK-NEXT: ld.param.b8 %rs3, [retval0+5]; -; CHECK-NEXT: ld.param.b8 %rs4, [retval0+6]; -; CHECK-NEXT: ld.param.b8 %rs5, [retval0+7]; -; CHECK-NEXT: ld.param.b8 %rs6, [retval0+8]; ; CHECK-NEXT: ld.param.b64 %rd2, [retval0+16]; +; CHECK-NEXT: ld.param.b8 %rs2, [retval0+4]; +; CHECK-NEXT: ld.param.b32 %r5, [retval0]; +; CHECK-NEXT: ld.param.b8 %r6, [retval0+8]; +; CHECK-NEXT: ld.param.b8 %r7, [retval0+7]; +; CHECK-NEXT: ld.param.b8 %r8, [retval0+6]; +; CHECK-NEXT: ld.param.b8 %r9, [retval0+5]; ; CHECK-NEXT: } // callseq 4 -; CHECK-NEXT: cvt.u32.u16 %r15, %rs3; -; CHECK-NEXT: cvt.u32.u16 %r16, %rs4; -; CHECK-NEXT: cvt.u32.u16 %r17, %rs5; -; CHECK-NEXT: cvt.u32.u16 %r18, %rs6; -; CHECK-NEXT: st.param.b32 [func_retval0], %r14; -; CHECK-NEXT: st.param.b8 [func_retval0+4], %rs2; -; CHECK-NEXT: st.param.b8 [func_retval0+8], %r18; -; CHECK-NEXT: st.param.b8 [func_retval0+7], %r17; -; CHECK-NEXT: st.param.b8 [func_retval0+6], %r16; -; CHECK-NEXT: st.param.b8 [func_retval0+5], %r15; +; CHECK-NEXT: shl.b32 %r12, %r8, 8; +; CHECK-NEXT: or.b32 %r13, %r12, %r9; +; CHECK-NEXT: shl.b32 %r15, %r7, 16; +; CHECK-NEXT: shl.b32 %r17, %r6, 24; +; CHECK-NEXT: or.b32 %r18, %r17, %r15; +; CHECK-NEXT: or.b32 %r19, %r18, %r13; +; CHECK-NEXT: st.param.b8 [func_retval0+5], %r9; ; CHECK-NEXT: st.param.b64 [func_retval0+16], %rd2; +; CHECK-NEXT: st.param.b8 [func_retval0+4], %rs2; +; CHECK-NEXT: st.param.b32 [func_retval0], %r5; +; CHECK-NEXT: shr.u32 %r21, %r19, 24; +; CHECK-NEXT: st.param.b8 [func_retval0+8], %r21; +; CHECK-NEXT: shr.u32 %r22, %r19, 16; +; CHECK-NEXT: st.param.b8 [func_retval0+7], %r22; +; CHECK-NEXT: shr.u32 %r23, %r19, 8; +; CHECK-NEXT: st.param.b8 [func_retval0+6], %r23; ; CHECK-NEXT: ret; %r = tail call %s_i8f16x2p @test_s_i8f16x2p(%s_i8f16x2p %a) ret %s_i8f16x2p %r @@ -337,56 +278,51 @@ define %s_i8f16x2p @test_s_i8f16x2p(%s_i8f16x2p %a) { define %s_i8f32p @test_s_i8f32p(%s_i8f32p %a) { ; CHECK-LABEL: test_s_i8f32p( ; CHECK: { -; CHECK-NEXT: .reg .b16 %rs<12>; -; CHECK-NEXT: .reg .b32 %r<20>; +; CHECK-NEXT: .reg .b16 %rs<4>; +; CHECK-NEXT: .reg .b32 %r<24>; ; CHECK-NEXT: .reg .b64 %rd<4>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: ld.param.b8 %r3, [test_s_i8f32p_param_0+6]; -; CHECK-NEXT: shl.b32 %r4, %r3, 8; -; CHECK-NEXT: ld.param.b8 %r5, [test_s_i8f32p_param_0+5]; -; CHECK-NEXT: or.b32 %r6, %r4, %r5; -; CHECK-NEXT: ld.param.b8 %r7, [test_s_i8f32p_param_0+7]; -; CHECK-NEXT: shl.b32 %r8, %r7, 16; -; CHECK-NEXT: ld.param.b8 %r9, [test_s_i8f32p_param_0+8]; -; CHECK-NEXT: shl.b32 %r10, %r9, 24; -; CHECK-NEXT: or.b32 %r11, %r10, %r8; -; CHECK-NEXT: or.b32 %r2, %r11, %r6; -; CHECK-NEXT: ld.param.b64 %rd1, [test_s_i8f32p_param_0+16]; -; CHECK-NEXT: ld.param.b8 %rs1, [test_s_i8f32p_param_0+4]; ; CHECK-NEXT: ld.param.b32 %r1, [test_s_i8f32p_param_0]; -; CHECK-NEXT: shr.u32 %r12, %r2, 8; -; CHECK-NEXT: shr.u32 %r13, %r11, 16; +; CHECK-NEXT: ld.param.b16 %rs1, [test_s_i8f32p_param_0+4]; +; CHECK-NEXT: ld.param.b64 %rd1, [test_s_i8f32p_param_0+16]; +; CHECK-NEXT: ld.param.b8 %r2, [test_s_i8f32p_param_0+6]; +; CHECK-NEXT: ld.param.b8 %r3, [test_s_i8f32p_param_0+7]; +; CHECK-NEXT: ld.param.b8 %r4, [test_s_i8f32p_param_0+8]; ; CHECK-NEXT: { // callseq 5, 0 ; CHECK-NEXT: .param .align 8 .b8 param0[24]; -; CHECK-NEXT: st.param.b32 [param0], %r1; -; CHECK-NEXT: st.param.b8 [param0+4], %rs1; -; CHECK-NEXT: st.param.b8 [param0+5], %r2; -; CHECK-NEXT: st.param.b8 [param0+6], %r12; -; CHECK-NEXT: st.param.b8 [param0+7], %r13; -; CHECK-NEXT: st.param.b8 [param0+8], %r9; -; CHECK-NEXT: st.param.b64 [param0+16], %rd1; ; CHECK-NEXT: .param .align 8 .b8 retval0[24]; +; CHECK-NEXT: st.param.b8 [param0+8], %r4; +; CHECK-NEXT: st.param.b8 [param0+7], %r3; +; CHECK-NEXT: st.param.b8 [param0+6], %r2; +; CHECK-NEXT: st.param.b64 [param0+16], %rd1; +; CHECK-NEXT: st.param.b16 [param0+4], %rs1; +; CHECK-NEXT: st.param.b32 [param0], %r1; ; CHECK-NEXT: call.uni (retval0), test_s_i8f32p, (param0); -; CHECK-NEXT: ld.param.b32 %r14, [retval0]; -; CHECK-NEXT: ld.param.b8 %rs2, [retval0+4]; -; CHECK-NEXT: ld.param.b8 %rs3, [retval0+5]; -; CHECK-NEXT: ld.param.b8 %rs4, [retval0+6]; -; CHECK-NEXT: ld.param.b8 %rs5, [retval0+7]; -; CHECK-NEXT: ld.param.b8 %rs6, [retval0+8]; ; CHECK-NEXT: ld.param.b64 %rd2, [retval0+16]; +; CHECK-NEXT: ld.param.b8 %rs2, [retval0+4]; +; CHECK-NEXT: ld.param.b32 %r5, [retval0]; +; CHECK-NEXT: ld.param.b8 %r6, [retval0+8]; +; CHECK-NEXT: ld.param.b8 %r7, [retval0+7]; +; CHECK-NEXT: ld.param.b8 %r8, [retval0+6]; +; CHECK-NEXT: ld.param.b8 %r9, [retval0+5]; ; CHECK-NEXT: } // callseq 5 -; CHECK-NEXT: cvt.u32.u16 %r15, %rs3; -; CHECK-NEXT: cvt.u32.u16 %r16, %rs4; -; CHECK-NEXT: cvt.u32.u16 %r17, %rs5; -; CHECK-NEXT: cvt.u32.u16 %r18, %rs6; -; CHECK-NEXT: st.param.b32 [func_retval0], %r14; -; CHECK-NEXT: st.param.b8 [func_retval0+4], %rs2; -; CHECK-NEXT: st.param.b8 [func_retval0+8], %r18; -; CHECK-NEXT: st.param.b8 [func_retval0+7], %r17; -; CHECK-NEXT: st.param.b8 [func_retval0+6], %r16; -; CHECK-NEXT: st.param.b8 [func_retval0+5], %r15; +; CHECK-NEXT: shl.b32 %r12, %r8, 8; +; CHECK-NEXT: or.b32 %r13, %r12, %r9; +; CHECK-NEXT: shl.b32 %r15, %r7, 16; +; CHECK-NEXT: shl.b32 %r17, %r6, 24; +; CHECK-NEXT: or.b32 %r18, %r17, %r15; +; CHECK-NEXT: or.b32 %r19, %r18, %r13; +; CHECK-NEXT: st.param.b8 [func_retval0+5], %r9; ; CHECK-NEXT: st.param.b64 [func_retval0+16], %rd2; +; CHECK-NEXT: st.param.b8 [func_retval0+4], %rs2; +; CHECK-NEXT: st.param.b32 [func_retval0], %r5; +; CHECK-NEXT: shr.u32 %r21, %r19, 24; +; CHECK-NEXT: st.param.b8 [func_retval0+8], %r21; +; CHECK-NEXT: shr.u32 %r22, %r19, 16; +; CHECK-NEXT: st.param.b8 [func_retval0+7], %r22; +; CHECK-NEXT: shr.u32 %r23, %r19, 8; +; CHECK-NEXT: st.param.b8 [func_retval0+6], %r23; ; CHECK-NEXT: ret; %r = tail call %s_i8f32p @test_s_i8f32p(%s_i8f32p %a) ret %s_i8f32p %r @@ -396,112 +332,66 @@ define %s_i8f32p @test_s_i8f32p(%s_i8f32p %a) { define %s_i8f64p @test_s_i8f64p(%s_i8f64p %a) { ; CHECK-LABEL: test_s_i8f64p( ; CHECK: { -; CHECK-NEXT: .reg .b16 %rs<20>; -; CHECK-NEXT: .reg .b64 %rd<68>; +; CHECK-NEXT: .reg .b16 %rs<3>; +; CHECK-NEXT: .reg .b64 %rd<46>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: ld.param.b8 %rd4, [test_s_i8f64p_param_0+10]; -; CHECK-NEXT: shl.b64 %rd5, %rd4, 8; -; CHECK-NEXT: ld.param.b8 %rd6, [test_s_i8f64p_param_0+9]; -; CHECK-NEXT: or.b64 %rd7, %rd5, %rd6; -; CHECK-NEXT: ld.param.b8 %rd8, [test_s_i8f64p_param_0+11]; -; CHECK-NEXT: shl.b64 %rd9, %rd8, 16; -; CHECK-NEXT: ld.param.b8 %rd10, [test_s_i8f64p_param_0+12]; -; CHECK-NEXT: shl.b64 %rd11, %rd10, 24; -; CHECK-NEXT: or.b64 %rd12, %rd11, %rd9; -; CHECK-NEXT: or.b64 %rd13, %rd12, %rd7; -; CHECK-NEXT: ld.param.b8 %rd14, [test_s_i8f64p_param_0+14]; -; CHECK-NEXT: shl.b64 %rd15, %rd14, 8; -; CHECK-NEXT: ld.param.b8 %rd16, [test_s_i8f64p_param_0+13]; -; CHECK-NEXT: or.b64 %rd17, %rd15, %rd16; -; CHECK-NEXT: ld.param.b8 %rd18, [test_s_i8f64p_param_0+15]; -; CHECK-NEXT: shl.b64 %rd19, %rd18, 16; -; CHECK-NEXT: ld.param.b8 %rd20, [test_s_i8f64p_param_0+16]; -; CHECK-NEXT: shl.b64 %rd21, %rd20, 24; -; CHECK-NEXT: or.b64 %rd22, %rd21, %rd19; -; CHECK-NEXT: or.b64 %rd23, %rd22, %rd17; -; CHECK-NEXT: shl.b64 %rd24, %rd23, 32; -; CHECK-NEXT: or.b64 %rd2, %rd24, %rd13; -; CHECK-NEXT: ld.param.b64 %rd3, [test_s_i8f64p_param_0+24]; -; CHECK-NEXT: ld.param.b8 %rs1, [test_s_i8f64p_param_0+8]; ; CHECK-NEXT: ld.param.b64 %rd1, [test_s_i8f64p_param_0]; -; CHECK-NEXT: shr.u64 %rd25, %rd2, 8; -; CHECK-NEXT: shr.u64 %rd26, %rd2, 16; -; CHECK-NEXT: shr.u64 %rd27, %rd2, 24; -; CHECK-NEXT: bfe.u64 %rd28, %rd23, 8, 24; -; CHECK-NEXT: bfe.u64 %rd29, %rd23, 16, 16; -; CHECK-NEXT: bfe.u64 %rd30, %rd23, 24, 8; +; CHECK-NEXT: ld.param.b64 %rd2, [test_s_i8f64p_param_0+8]; +; CHECK-NEXT: ld.param.b64 %rd3, [test_s_i8f64p_param_0+24]; +; CHECK-NEXT: ld.param.b8 %rd4, [test_s_i8f64p_param_0+16]; ; CHECK-NEXT: { // callseq 6, 0 ; CHECK-NEXT: .param .align 8 .b8 param0[32]; -; CHECK-NEXT: st.param.b64 [param0], %rd1; -; CHECK-NEXT: st.param.b8 [param0+8], %rs1; -; CHECK-NEXT: st.param.b8 [param0+9], %rd2; -; CHECK-NEXT: st.param.b8 [param0+10], %rd25; -; CHECK-NEXT: st.param.b8 [param0+11], %rd26; -; CHECK-NEXT: st.param.b8 [param0+12], %rd27; -; CHECK-NEXT: st.param.b8 [param0+13], %rd23; -; CHECK-NEXT: st.param.b8 [param0+14], %rd28; -; CHECK-NEXT: st.param.b8 [param0+15], %rd29; -; CHECK-NEXT: st.param.b8 [param0+16], %rd30; -; CHECK-NEXT: st.param.b64 [param0+24], %rd3; ; CHECK-NEXT: .param .align 8 .b8 retval0[32]; +; CHECK-NEXT: st.param.b8 [param0+16], %rd4; +; CHECK-NEXT: st.param.b64 [param0+24], %rd3; +; CHECK-NEXT: st.param.b64 [param0+8], %rd2; +; CHECK-NEXT: st.param.b64 [param0], %rd1; ; CHECK-NEXT: call.uni (retval0), test_s_i8f64p, (param0); -; CHECK-NEXT: ld.param.b64 %rd31, [retval0]; -; CHECK-NEXT: ld.param.b8 %rs2, [retval0+8]; -; CHECK-NEXT: ld.param.b8 %rs3, [retval0+9]; -; CHECK-NEXT: ld.param.b8 %rs4, [retval0+10]; -; CHECK-NEXT: ld.param.b8 %rs5, [retval0+11]; -; CHECK-NEXT: ld.param.b8 %rs6, [retval0+12]; -; CHECK-NEXT: ld.param.b8 %rs7, [retval0+13]; -; CHECK-NEXT: ld.param.b8 %rs8, [retval0+14]; -; CHECK-NEXT: ld.param.b8 %rs9, [retval0+15]; -; CHECK-NEXT: ld.param.b8 %rs10, [retval0+16]; -; CHECK-NEXT: ld.param.b64 %rd32, [retval0+24]; +; CHECK-NEXT: ld.param.b64 %rd5, [retval0+24]; +; CHECK-NEXT: ld.param.b8 %rs1, [retval0+8]; +; CHECK-NEXT: ld.param.b64 %rd6, [retval0]; +; CHECK-NEXT: ld.param.b8 %rd7, [retval0+16]; +; CHECK-NEXT: ld.param.b8 %rd8, [retval0+15]; +; CHECK-NEXT: ld.param.b8 %rd9, [retval0+14]; +; CHECK-NEXT: ld.param.b8 %rd10, [retval0+13]; +; CHECK-NEXT: ld.param.b8 %rd11, [retval0+12]; +; CHECK-NEXT: ld.param.b8 %rd12, [retval0+11]; +; CHECK-NEXT: ld.param.b8 %rd13, [retval0+10]; +; CHECK-NEXT: ld.param.b8 %rd14, [retval0+9]; ; CHECK-NEXT: } // callseq 6 -; CHECK-NEXT: cvt.u64.u16 %rd33, %rs3; -; CHECK-NEXT: and.b64 %rd34, %rd33, 255; -; CHECK-NEXT: cvt.u64.u16 %rd35, %rs4; -; CHECK-NEXT: and.b64 %rd36, %rd35, 255; -; CHECK-NEXT: shl.b64 %rd37, %rd36, 8; -; CHECK-NEXT: or.b64 %rd38, %rd34, %rd37; -; CHECK-NEXT: cvt.u64.u16 %rd39, %rs5; -; CHECK-NEXT: and.b64 %rd40, %rd39, 255; -; CHECK-NEXT: shl.b64 %rd41, %rd40, 16; -; CHECK-NEXT: or.b64 %rd42, %rd38, %rd41; -; CHECK-NEXT: cvt.u64.u16 %rd43, %rs6; -; CHECK-NEXT: and.b64 %rd44, %rd43, 255; -; CHECK-NEXT: shl.b64 %rd45, %rd44, 24; -; CHECK-NEXT: or.b64 %rd46, %rd42, %rd45; -; CHECK-NEXT: cvt.u64.u16 %rd47, %rs7; -; CHECK-NEXT: and.b64 %rd48, %rd47, 255; -; CHECK-NEXT: shl.b64 %rd49, %rd48, 32; -; CHECK-NEXT: or.b64 %rd50, %rd46, %rd49; -; CHECK-NEXT: cvt.u64.u16 %rd51, %rs8; -; CHECK-NEXT: and.b64 %rd52, %rd51, 255; -; CHECK-NEXT: shl.b64 %rd53, %rd52, 40; -; CHECK-NEXT: or.b64 %rd54, %rd50, %rd53; -; CHECK-NEXT: cvt.u64.u16 %rd55, %rs9; -; CHECK-NEXT: and.b64 %rd56, %rd55, 255; -; CHECK-NEXT: shl.b64 %rd57, %rd56, 48; -; CHECK-NEXT: or.b64 %rd58, %rd54, %rd57; -; CHECK-NEXT: cvt.u64.u16 %rd59, %rs10; -; CHECK-NEXT: shl.b64 %rd60, %rd59, 56; -; CHECK-NEXT: or.b64 %rd61, %rd58, %rd60; -; CHECK-NEXT: st.param.b64 [func_retval0], %rd31; -; CHECK-NEXT: st.param.b8 [func_retval0+8], %rs2; +; CHECK-NEXT: shl.b64 %rd17, %rd13, 8; +; CHECK-NEXT: or.b64 %rd18, %rd17, %rd14; +; CHECK-NEXT: shl.b64 %rd20, %rd12, 16; +; CHECK-NEXT: shl.b64 %rd22, %rd11, 24; +; CHECK-NEXT: or.b64 %rd23, %rd22, %rd20; +; CHECK-NEXT: or.b64 %rd24, %rd23, %rd18; +; CHECK-NEXT: shl.b64 %rd27, %rd9, 8; +; CHECK-NEXT: or.b64 %rd28, %rd27, %rd10; +; CHECK-NEXT: shl.b64 %rd30, %rd8, 16; +; CHECK-NEXT: shl.b64 %rd32, %rd7, 24; +; CHECK-NEXT: or.b64 %rd33, %rd32, %rd30; +; CHECK-NEXT: or.b64 %rd34, %rd33, %rd28; +; CHECK-NEXT: shl.b64 %rd35, %rd34, 32; +; CHECK-NEXT: or.b64 %rd36, %rd35, %rd24; +; CHECK-NEXT: st.param.b8 [func_retval0+9], %rd14; +; CHECK-NEXT: st.param.b64 [func_retval0+24], %rd5; +; CHECK-NEXT: st.param.b8 [func_retval0+8], %rs1; +; CHECK-NEXT: st.param.b64 [func_retval0], %rd6; +; CHECK-NEXT: shr.u64 %rd39, %rd36, 56; +; CHECK-NEXT: st.param.b8 [func_retval0+16], %rd39; +; CHECK-NEXT: shr.u64 %rd40, %rd36, 48; +; CHECK-NEXT: st.param.b8 [func_retval0+15], %rd40; +; CHECK-NEXT: shr.u64 %rd41, %rd36, 40; +; CHECK-NEXT: st.param.b8 [func_retval0+14], %rd41; +; CHECK-NEXT: shr.u64 %rd42, %rd36, 32; +; CHECK-NEXT: st.param.b8 [func_retval0+13], %rd42; +; CHECK-NEXT: shr.u64 %rd43, %rd36, 24; ; CHECK-NEXT: st.param.b8 [func_retval0+12], %rd43; -; CHECK-NEXT: st.param.b8 [func_retval0+11], %rd39; -; CHECK-NEXT: st.param.b8 [func_retval0+10], %rd35; -; CHECK-NEXT: st.param.b8 [func_retval0+9], %rd33; -; CHECK-NEXT: shr.u64 %rd64, %rd50, 32; -; CHECK-NEXT: st.param.b8 [func_retval0+13], %rd64; -; CHECK-NEXT: shr.u64 %rd65, %rd54, 40; -; CHECK-NEXT: st.param.b8 [func_retval0+14], %rd65; -; CHECK-NEXT: shr.u64 %rd66, %rd58, 48; -; CHECK-NEXT: st.param.b8 [func_retval0+15], %rd66; -; CHECK-NEXT: shr.u64 %rd67, %rd61, 56; -; CHECK-NEXT: st.param.b8 [func_retval0+16], %rd67; -; CHECK-NEXT: st.param.b64 [func_retval0+24], %rd32; +; CHECK-NEXT: shr.u64 %rd44, %rd36, 16; +; CHECK-NEXT: st.param.b8 [func_retval0+11], %rd44; +; CHECK-NEXT: shr.u64 %rd45, %rd36, 8; +; CHECK-NEXT: st.param.b8 [func_retval0+10], %rd45; ; CHECK-NEXT: ret; %r = tail call %s_i8f64p @test_s_i8f64p(%s_i8f64p %a) ret %s_i8f64p %r diff --git a/llvm/test/CodeGen/NVPTX/vaargs.ll b/llvm/test/CodeGen/NVPTX/vaargs.ll index 3ca729f..9e312a2 100644 --- a/llvm/test/CodeGen/NVPTX/vaargs.ll +++ b/llvm/test/CodeGen/NVPTX/vaargs.ll @@ -89,14 +89,14 @@ define i32 @test_foo(i32 %i, i64 %l, double %d, ptr %p) { ; CHECK-NEXT: ld.param.b32 [[ARG_I32:%r[0-9]+]], [test_foo_param_0]; ; Store arguments to an array -; CHECK32: .param .align 8 .b8 param1[28]; -; CHECK64: .param .align 8 .b8 param1[32]; -; CHECK-NEXT: st.param.b32 [param1], [[ARG_I32]]; -; CHECK-NEXT: st.param.b64 [param1+8], [[ARG_I64]]; -; CHECK-NEXT: st.param.b64 [param1+16], [[ARG_DOUBLE]]; -; CHECK-NEXT: st.param.b[[BITS]] [param1+24], [[ARG_VOID_PTR]]; -; CHECK-NEXT: .param .b32 retval0; -; CHECK-NEXT: prototype_1 : .callprototype (.param .b32 _) _ (.param .b32 _, .param .align 8 .b8 _[] +; CHECK32: .param .align 8 .b8 param1[28]; +; CHECK64: .param .align 8 .b8 param1[32]; +; CHECK-DAG: .param .b32 retval0; +; CHECK-DAG: st.param.b32 [param1], [[ARG_I32]]; +; CHECK-DAG: st.param.b64 [param1+8], [[ARG_I64]]; +; CHECK-DAG: st.param.b64 [param1+16], [[ARG_DOUBLE]]; +; CHECK-DAG: st.param.b[[BITS]] [param1+24], [[ARG_VOID_PTR]]; +; CHECK-DAG: prototype_1 : .callprototype (.param .b32 _) _ (.param .b32 _, .param .align 8 .b8 _[] entry: %ptr = load ptr, ptr addrspacecast (ptr addrspace(1) @foo_ptr to ptr), align 8 diff --git a/llvm/test/CodeGen/NVPTX/variadics-backend.ll b/llvm/test/CodeGen/NVPTX/variadics-backend.ll index ad2e704..a9b3675 100644 --- a/llvm/test/CodeGen/NVPTX/variadics-backend.ll +++ b/llvm/test/CodeGen/NVPTX/variadics-backend.ll @@ -115,13 +115,13 @@ define dso_local i32 @foo() { ; CHECK-PTX-NEXT: st.b64 [%SP+16], 1; ; CHECK-PTX-NEXT: st.b64 [%SP+24], 4607182418800017408; ; CHECK-PTX-NEXT: st.b64 [%SP+32], 4607182418800017408; -; CHECK-PTX-NEXT: add.u64 %rd1, %SP, 0; ; CHECK-PTX-NEXT: { // callseq 0, 0 ; CHECK-PTX-NEXT: .param .b32 param0; -; CHECK-PTX-NEXT: st.param.b32 [param0], 1; ; CHECK-PTX-NEXT: .param .b64 param1; -; CHECK-PTX-NEXT: st.param.b64 [param1], %rd1; ; CHECK-PTX-NEXT: .param .b32 retval0; +; CHECK-PTX-NEXT: add.u64 %rd1, %SP, 0; +; CHECK-PTX-NEXT: st.param.b64 [param1], %rd1; +; CHECK-PTX-NEXT: st.param.b32 [param0], 1; ; CHECK-PTX-NEXT: call.uni (retval0), variadics1, (param0, param1); ; CHECK-PTX-NEXT: ld.param.b32 %r1, [retval0]; ; CHECK-PTX-NEXT: } // callseq 0 @@ -218,13 +218,13 @@ define dso_local i32 @bar() { ; CHECK-PTX-NEXT: st.b32 [%SP+8], 1; ; CHECK-PTX-NEXT: st.b8 [%SP+12], 1; ; CHECK-PTX-NEXT: st.b64 [%SP+16], 1; -; CHECK-PTX-NEXT: add.u64 %rd3, %SP, 8; ; CHECK-PTX-NEXT: { // callseq 1, 0 ; CHECK-PTX-NEXT: .param .b32 param0; -; CHECK-PTX-NEXT: st.param.b32 [param0], 1; ; CHECK-PTX-NEXT: .param .b64 param1; -; CHECK-PTX-NEXT: st.param.b64 [param1], %rd3; ; CHECK-PTX-NEXT: .param .b32 retval0; +; CHECK-PTX-NEXT: add.u64 %rd3, %SP, 8; +; CHECK-PTX-NEXT: st.param.b64 [param1], %rd3; +; CHECK-PTX-NEXT: st.param.b32 [param0], 1; ; CHECK-PTX-NEXT: call.uni (retval0), variadics2, (param0, param1); ; CHECK-PTX-NEXT: ld.param.b32 %r1, [retval0]; ; CHECK-PTX-NEXT: } // callseq 1 @@ -289,13 +289,13 @@ define dso_local i32 @baz() { ; CHECK-PTX-NEXT: mov.b64 %SPL, __local_depot5; ; CHECK-PTX-NEXT: cvta.local.u64 %SP, %SPL; ; CHECK-PTX-NEXT: st.v4.b32 [%SP], {1, 1, 1, 1}; -; CHECK-PTX-NEXT: add.u64 %rd1, %SP, 0; ; CHECK-PTX-NEXT: { // callseq 2, 0 ; CHECK-PTX-NEXT: .param .b32 param0; -; CHECK-PTX-NEXT: st.param.b32 [param0], 1; ; CHECK-PTX-NEXT: .param .b64 param1; -; CHECK-PTX-NEXT: st.param.b64 [param1], %rd1; ; CHECK-PTX-NEXT: .param .b32 retval0; +; CHECK-PTX-NEXT: add.u64 %rd1, %SP, 0; +; CHECK-PTX-NEXT: st.param.b64 [param1], %rd1; +; CHECK-PTX-NEXT: st.param.b32 [param0], 1; ; CHECK-PTX-NEXT: call.uni (retval0), variadics3, (param0, param1); ; CHECK-PTX-NEXT: ld.param.b32 %r1, [retval0]; ; CHECK-PTX-NEXT: } // callseq 2 @@ -348,7 +348,6 @@ define dso_local void @qux() { ; CHECK-PTX-NEXT: .local .align 8 .b8 __local_depot7[24]; ; CHECK-PTX-NEXT: .reg .b64 %SP; ; CHECK-PTX-NEXT: .reg .b64 %SPL; -; CHECK-PTX-NEXT: .reg .b32 %r<2>; ; CHECK-PTX-NEXT: .reg .b64 %rd<8>; ; CHECK-PTX-EMPTY: ; CHECK-PTX-NEXT: // %bb.0: // %entry @@ -360,18 +359,17 @@ define dso_local void @qux() { ; CHECK-PTX-NEXT: ld.global.nc.b64 %rd4, [__const_$_qux_$_s]; ; CHECK-PTX-NEXT: st.local.b64 [%rd2], %rd4; ; CHECK-PTX-NEXT: st.b64 [%SP+16], 1; -; CHECK-PTX-NEXT: ld.local.b64 %rd5, [%rd2]; -; CHECK-PTX-NEXT: ld.local.b64 %rd6, [%rd2+8]; -; CHECK-PTX-NEXT: add.u64 %rd7, %SP, 16; ; CHECK-PTX-NEXT: { // callseq 3, 0 ; CHECK-PTX-NEXT: .param .align 8 .b8 param0[16]; -; CHECK-PTX-NEXT: st.param.b64 [param0], %rd5; -; CHECK-PTX-NEXT: st.param.b64 [param0+8], %rd6; ; CHECK-PTX-NEXT: .param .b64 param1; -; CHECK-PTX-NEXT: st.param.b64 [param1], %rd7; ; CHECK-PTX-NEXT: .param .b32 retval0; +; CHECK-PTX-NEXT: add.u64 %rd5, %SP, 16; +; CHECK-PTX-NEXT: st.param.b64 [param1], %rd5; +; CHECK-PTX-NEXT: ld.local.b64 %rd6, [%rd2+8]; +; CHECK-PTX-NEXT: st.param.b64 [param0+8], %rd6; +; CHECK-PTX-NEXT: ld.local.b64 %rd7, [%rd2]; +; CHECK-PTX-NEXT: st.param.b64 [param0], %rd7; ; CHECK-PTX-NEXT: call.uni (retval0), variadics4, (param0, param1); -; CHECK-PTX-NEXT: ld.param.b32 %r1, [retval0]; ; CHECK-PTX-NEXT: } // callseq 3 ; CHECK-PTX-NEXT: ret; entry: diff --git a/llvm/test/CodeGen/NVPTX/vector-loads.ll b/llvm/test/CodeGen/NVPTX/vector-loads.ll index e16fc74..6f0dff7 100644 --- a/llvm/test/CodeGen/NVPTX/vector-loads.ll +++ b/llvm/test/CodeGen/NVPTX/vector-loads.ll @@ -154,7 +154,7 @@ define void @foo_complex(ptr nocapture readonly align 16 dereferenceable(1342177 ; CHECK: { ; CHECK-NEXT: .reg .b16 %rs<4>; ; CHECK-NEXT: .reg .b32 %r<8>; -; CHECK-NEXT: .reg .b64 %rd<6>; +; CHECK-NEXT: .reg .b64 %rd<5>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b64 %rd1, [foo_complex_param_0]; @@ -166,12 +166,11 @@ define void @foo_complex(ptr nocapture readonly align 16 dereferenceable(1342177 ; CHECK-NEXT: shl.b32 %r6, %r1, 1; ; CHECK-NEXT: or.b32 %r7, %r5, %r6; ; CHECK-NEXT: cvt.u64.u32 %rd2, %r7; -; CHECK-NEXT: mul.wide.u32 %rd3, %r3, 131072; -; CHECK-NEXT: add.s64 %rd4, %rd1, %rd3; -; CHECK-NEXT: add.s64 %rd5, %rd4, %rd2; -; CHECK-NEXT: ld.v2.b8 {%rs1, %rs2}, [%rd5+128]; +; CHECK-NEXT: mad.wide.u32 %rd3, %r3, 131072, %rd1; +; CHECK-NEXT: add.s64 %rd4, %rd3, %rd2; +; CHECK-NEXT: ld.v2.b8 {%rs1, %rs2}, [%rd4+128]; ; CHECK-NEXT: max.u16 %rs3, %rs1, %rs2; -; CHECK-NEXT: st.b8 [%rd5+129], %rs3; +; CHECK-NEXT: st.b8 [%rd4+129], %rs3; ; CHECK-NEXT: ret; %t0 = tail call i32 @llvm.nvvm.read.ptx.sreg.tid.x(), !range !1 %t1 = tail call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x() |