diff options
Diffstat (limited to 'llvm/test/CodeGen/NVPTX/f16x2-instructions.ll')
-rw-r--r-- | llvm/test/CodeGen/NVPTX/f16x2-instructions.ll | 611 |
1 files changed, 359 insertions, 252 deletions
diff --git a/llvm/test/CodeGen/NVPTX/f16x2-instructions.ll b/llvm/test/CodeGen/NVPTX/f16x2-instructions.ll index d0e2c18..d4fcea3 100644 --- a/llvm/test/CodeGen/NVPTX/f16x2-instructions.ll +++ b/llvm/test/CodeGen/NVPTX/f16x2-instructions.ll @@ -45,11 +45,12 @@ define <2 x half> @test_ret_const() #0 { define half @test_extract_0(<2 x half> %a) #0 { ; CHECK-LABEL: test_extract_0( ; CHECK: { -; CHECK-NEXT: .reg .b16 %rs<3>; +; CHECK-NEXT: .reg .b16 %rs<2>; ; CHECK-NEXT: .reg .b32 %r<2>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: ld.param.v2.b16 {%rs1, %rs2}, [test_extract_0_param_0]; +; CHECK-NEXT: ld.param.b32 %r1, [test_extract_0_param_0]; +; CHECK-NEXT: { .reg .b16 tmp; mov.b32 {%rs1, tmp}, %r1; } ; CHECK-NEXT: st.param.b16 [func_retval0], %rs1; ; CHECK-NEXT: ret; %e = extractelement <2 x half> %a, i32 0 @@ -59,12 +60,13 @@ define half @test_extract_0(<2 x half> %a) #0 { define half @test_extract_1(<2 x half> %a) #0 { ; CHECK-LABEL: test_extract_1( ; CHECK: { -; CHECK-NEXT: .reg .b16 %rs<3>; +; CHECK-NEXT: .reg .b16 %rs<2>; ; CHECK-NEXT: .reg .b32 %r<2>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: ld.param.v2.b16 {%rs1, %rs2}, [test_extract_1_param_0]; -; CHECK-NEXT: st.param.b16 [func_retval0], %rs2; +; CHECK-NEXT: ld.param.b32 %r1, [test_extract_1_param_0]; +; CHECK-NEXT: { .reg .b16 tmp; mov.b32 {tmp, %rs1}, %r1; } +; CHECK-NEXT: st.param.b16 [func_retval0], %rs1; ; CHECK-NEXT: ret; %e = extractelement <2 x half> %a, i32 1 ret half %e @@ -80,8 +82,9 @@ define half @test_extract_i(<2 x half> %a, i64 %idx) #0 { ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b64 %rd1, [test_extract_i_param_1]; -; CHECK-NEXT: ld.param.v2.b16 {%rs1, %rs2}, [test_extract_i_param_0]; +; CHECK-NEXT: ld.param.b32 %r1, [test_extract_i_param_0]; ; CHECK-NEXT: setp.eq.b64 %p1, %rd1, 0; +; CHECK-NEXT: mov.b32 {%rs1, %rs2}, %r1; ; CHECK-NEXT: selp.b16 %rs3, %rs1, %rs2, %p1; ; CHECK-NEXT: st.param.b16 [func_retval0], %rs3; ; CHECK-NEXT: ret; @@ -107,14 +110,16 @@ define <2 x half> @test_fadd(<2 x half> %a, <2 x half> %b) #0 { ; CHECK-NOF16-NEXT: .reg .b32 %r<10>; ; CHECK-NOF16-EMPTY: ; CHECK-NOF16-NEXT: // %bb.0: -; CHECK-NOF16-NEXT: ld.param.v2.b16 {%rs1, %rs2}, [test_fadd_param_0]; -; CHECK-NOF16-NEXT: ld.param.v2.b16 {%rs3, %rs4}, [test_fadd_param_1]; -; CHECK-NOF16-NEXT: cvt.f32.f16 %r3, %rs4; -; CHECK-NOF16-NEXT: cvt.f32.f16 %r4, %rs2; +; CHECK-NOF16-NEXT: ld.param.b32 %r2, [test_fadd_param_1]; +; CHECK-NOF16-NEXT: ld.param.b32 %r1, [test_fadd_param_0]; +; CHECK-NOF16-NEXT: mov.b32 {%rs1, %rs2}, %r2; +; CHECK-NOF16-NEXT: cvt.f32.f16 %r3, %rs2; +; CHECK-NOF16-NEXT: mov.b32 {%rs3, %rs4}, %r1; +; CHECK-NOF16-NEXT: cvt.f32.f16 %r4, %rs4; ; CHECK-NOF16-NEXT: add.rn.f32 %r5, %r4, %r3; ; CHECK-NOF16-NEXT: cvt.rn.f16.f32 %rs5, %r5; -; CHECK-NOF16-NEXT: cvt.f32.f16 %r6, %rs3; -; CHECK-NOF16-NEXT: cvt.f32.f16 %r7, %rs1; +; CHECK-NOF16-NEXT: cvt.f32.f16 %r6, %rs1; +; CHECK-NOF16-NEXT: cvt.f32.f16 %r7, %rs3; ; CHECK-NOF16-NEXT: add.rn.f32 %r8, %r7, %r6; ; CHECK-NOF16-NEXT: cvt.rn.f16.f32 %rs6, %r8; ; CHECK-NOF16-NEXT: mov.b32 %r9, {%rs6, %rs5}; @@ -143,7 +148,8 @@ define <2 x half> @test_fadd_imm_0(<2 x half> %a) #0 { ; CHECK-NOF16-NEXT: .reg .b32 %r<7>; ; CHECK-NOF16-EMPTY: ; CHECK-NOF16-NEXT: // %bb.0: -; CHECK-NOF16-NEXT: ld.param.v2.b16 {%rs1, %rs2}, [test_fadd_imm_0_param_0]; +; CHECK-NOF16-NEXT: ld.param.b32 %r1, [test_fadd_imm_0_param_0]; +; CHECK-NOF16-NEXT: mov.b32 {%rs1, %rs2}, %r1; ; CHECK-NOF16-NEXT: cvt.f32.f16 %r2, %rs2; ; CHECK-NOF16-NEXT: add.rn.f32 %r3, %r2, 0f40000000; ; CHECK-NOF16-NEXT: cvt.rn.f16.f32 %rs3, %r3; @@ -175,7 +181,8 @@ define <2 x half> @test_fadd_imm_1(<2 x half> %a) #0 { ; CHECK-NOF16-NEXT: .reg .b32 %r<7>; ; CHECK-NOF16-EMPTY: ; CHECK-NOF16-NEXT: // %bb.0: -; CHECK-NOF16-NEXT: ld.param.v2.b16 {%rs1, %rs2}, [test_fadd_imm_1_param_0]; +; CHECK-NOF16-NEXT: ld.param.b32 %r1, [test_fadd_imm_1_param_0]; +; CHECK-NOF16-NEXT: mov.b32 {%rs1, %rs2}, %r1; ; CHECK-NOF16-NEXT: cvt.f32.f16 %r2, %rs2; ; CHECK-NOF16-NEXT: add.rn.f32 %r3, %r2, 0f40000000; ; CHECK-NOF16-NEXT: cvt.rn.f16.f32 %rs3, %r3; @@ -207,14 +214,16 @@ define <2 x half> @test_fsub(<2 x half> %a, <2 x half> %b) #0 { ; CHECK-NOF16-NEXT: .reg .b32 %r<10>; ; CHECK-NOF16-EMPTY: ; CHECK-NOF16-NEXT: // %bb.0: -; CHECK-NOF16-NEXT: ld.param.v2.b16 {%rs1, %rs2}, [test_fsub_param_0]; -; CHECK-NOF16-NEXT: ld.param.v2.b16 {%rs3, %rs4}, [test_fsub_param_1]; -; CHECK-NOF16-NEXT: cvt.f32.f16 %r3, %rs4; -; CHECK-NOF16-NEXT: cvt.f32.f16 %r4, %rs2; +; CHECK-NOF16-NEXT: ld.param.b32 %r2, [test_fsub_param_1]; +; CHECK-NOF16-NEXT: ld.param.b32 %r1, [test_fsub_param_0]; +; CHECK-NOF16-NEXT: mov.b32 {%rs1, %rs2}, %r2; +; CHECK-NOF16-NEXT: cvt.f32.f16 %r3, %rs2; +; CHECK-NOF16-NEXT: mov.b32 {%rs3, %rs4}, %r1; +; CHECK-NOF16-NEXT: cvt.f32.f16 %r4, %rs4; ; CHECK-NOF16-NEXT: sub.rn.f32 %r5, %r4, %r3; ; CHECK-NOF16-NEXT: cvt.rn.f16.f32 %rs5, %r5; -; CHECK-NOF16-NEXT: cvt.f32.f16 %r6, %rs3; -; CHECK-NOF16-NEXT: cvt.f32.f16 %r7, %rs1; +; CHECK-NOF16-NEXT: cvt.f32.f16 %r6, %rs1; +; CHECK-NOF16-NEXT: cvt.f32.f16 %r7, %rs3; ; CHECK-NOF16-NEXT: sub.rn.f32 %r8, %r7, %r6; ; CHECK-NOF16-NEXT: cvt.rn.f16.f32 %rs6, %r8; ; CHECK-NOF16-NEXT: mov.b32 %r9, {%rs6, %rs5}; @@ -242,7 +251,8 @@ define <2 x half> @test_fneg(<2 x half> %a) #0 { ; CHECK-NOF16-NEXT: .reg .b32 %r<8>; ; CHECK-NOF16-EMPTY: ; CHECK-NOF16-NEXT: // %bb.0: -; CHECK-NOF16-NEXT: ld.param.v2.b16 {%rs1, %rs2}, [test_fneg_param_0]; +; CHECK-NOF16-NEXT: ld.param.b32 %r1, [test_fneg_param_0]; +; CHECK-NOF16-NEXT: mov.b32 {%rs1, %rs2}, %r1; ; CHECK-NOF16-NEXT: cvt.f32.f16 %r2, %rs2; ; CHECK-NOF16-NEXT: mov.b32 %r3, 0f00000000; ; CHECK-NOF16-NEXT: sub.rn.f32 %r4, %r3, %r2; @@ -275,14 +285,16 @@ define <2 x half> @test_fmul(<2 x half> %a, <2 x half> %b) #0 { ; CHECK-NOF16-NEXT: .reg .b32 %r<10>; ; CHECK-NOF16-EMPTY: ; CHECK-NOF16-NEXT: // %bb.0: -; CHECK-NOF16-NEXT: ld.param.v2.b16 {%rs1, %rs2}, [test_fmul_param_0]; -; CHECK-NOF16-NEXT: ld.param.v2.b16 {%rs3, %rs4}, [test_fmul_param_1]; -; CHECK-NOF16-NEXT: cvt.f32.f16 %r3, %rs4; -; CHECK-NOF16-NEXT: cvt.f32.f16 %r4, %rs2; +; CHECK-NOF16-NEXT: ld.param.b32 %r2, [test_fmul_param_1]; +; CHECK-NOF16-NEXT: ld.param.b32 %r1, [test_fmul_param_0]; +; CHECK-NOF16-NEXT: mov.b32 {%rs1, %rs2}, %r2; +; CHECK-NOF16-NEXT: cvt.f32.f16 %r3, %rs2; +; CHECK-NOF16-NEXT: mov.b32 {%rs3, %rs4}, %r1; +; CHECK-NOF16-NEXT: cvt.f32.f16 %r4, %rs4; ; CHECK-NOF16-NEXT: mul.rn.f32 %r5, %r4, %r3; ; CHECK-NOF16-NEXT: cvt.rn.f16.f32 %rs5, %r5; -; CHECK-NOF16-NEXT: cvt.f32.f16 %r6, %rs3; -; CHECK-NOF16-NEXT: cvt.f32.f16 %r7, %rs1; +; CHECK-NOF16-NEXT: cvt.f32.f16 %r6, %rs1; +; CHECK-NOF16-NEXT: cvt.f32.f16 %r7, %rs3; ; CHECK-NOF16-NEXT: mul.rn.f32 %r8, %r7, %r6; ; CHECK-NOF16-NEXT: cvt.rn.f16.f32 %rs6, %r8; ; CHECK-NOF16-NEXT: mov.b32 %r9, {%rs6, %rs5}; @@ -299,14 +311,16 @@ define <2 x half> @test_fdiv(<2 x half> %a, <2 x half> %b) #0 { ; CHECK-NEXT: .reg .b32 %r<10>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: ld.param.v2.b16 {%rs1, %rs2}, [test_fdiv_param_0]; -; CHECK-NEXT: ld.param.v2.b16 {%rs3, %rs4}, [test_fdiv_param_1]; -; CHECK-NEXT: cvt.f32.f16 %r3, %rs4; -; CHECK-NEXT: cvt.f32.f16 %r4, %rs2; +; CHECK-NEXT: ld.param.b32 %r2, [test_fdiv_param_1]; +; CHECK-NEXT: ld.param.b32 %r1, [test_fdiv_param_0]; +; CHECK-NEXT: mov.b32 {%rs1, %rs2}, %r2; +; CHECK-NEXT: cvt.f32.f16 %r3, %rs2; +; CHECK-NEXT: mov.b32 {%rs3, %rs4}, %r1; +; CHECK-NEXT: cvt.f32.f16 %r4, %rs4; ; CHECK-NEXT: div.rn.f32 %r5, %r4, %r3; ; CHECK-NEXT: cvt.rn.f16.f32 %rs5, %r5; -; CHECK-NEXT: cvt.f32.f16 %r6, %rs3; -; CHECK-NEXT: cvt.f32.f16 %r7, %rs1; +; CHECK-NEXT: cvt.f32.f16 %r6, %rs1; +; CHECK-NEXT: cvt.f32.f16 %r7, %rs3; ; CHECK-NEXT: div.rn.f32 %r8, %r7, %r6; ; CHECK-NEXT: cvt.rn.f16.f32 %rs6, %r8; ; CHECK-NEXT: mov.b32 %r9, {%rs6, %rs5}; @@ -331,10 +345,12 @@ define <2 x half> @test_frem(<2 x half> %a, <2 x half> %b) #0 { ; CHECK-NEXT: .reg .b32 %r<18>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: ld.param.v2.b16 {%rs1, %rs2}, [test_frem_param_0]; -; CHECK-NEXT: ld.param.v2.b16 {%rs3, %rs4}, [test_frem_param_1]; -; CHECK-NEXT: cvt.f32.f16 %r3, %rs4; -; CHECK-NEXT: cvt.f32.f16 %r4, %rs2; +; CHECK-NEXT: ld.param.b32 %r2, [test_frem_param_1]; +; CHECK-NEXT: ld.param.b32 %r1, [test_frem_param_0]; +; CHECK-NEXT: mov.b32 {%rs1, %rs2}, %r2; +; CHECK-NEXT: cvt.f32.f16 %r3, %rs2; +; CHECK-NEXT: mov.b32 {%rs3, %rs4}, %r1; +; CHECK-NEXT: cvt.f32.f16 %r4, %rs4; ; CHECK-NEXT: div.rn.f32 %r5, %r4, %r3; ; CHECK-NEXT: cvt.rzi.f32.f32 %r6, %r5; ; CHECK-NEXT: neg.f32 %r7, %r6; @@ -342,8 +358,8 @@ define <2 x half> @test_frem(<2 x half> %a, <2 x half> %b) #0 { ; CHECK-NEXT: testp.infinite.f32 %p1, %r3; ; CHECK-NEXT: selp.f32 %r9, %r4, %r8, %p1; ; CHECK-NEXT: cvt.rn.f16.f32 %rs5, %r9; -; CHECK-NEXT: cvt.f32.f16 %r10, %rs3; -; CHECK-NEXT: cvt.f32.f16 %r11, %rs1; +; CHECK-NEXT: cvt.f32.f16 %r10, %rs1; +; CHECK-NEXT: cvt.f32.f16 %r11, %rs3; ; CHECK-NEXT: div.rn.f32 %r12, %r11, %r10; ; CHECK-NEXT: cvt.rzi.f32.f32 %r13, %r12; ; CHECK-NEXT: neg.f32 %r14, %r13; @@ -446,10 +462,10 @@ define <2 x half> @test_call(<2 x half> %a, <2 x half> %b) #0 { ; CHECK-NEXT: ld.param.b32 %r1, [test_call_param_0]; ; CHECK-NEXT: { // callseq 0, 0 ; CHECK-NEXT: .param .align 4 .b8 param0[4]; -; CHECK-NEXT: st.param.b32 [param0], %r1; ; CHECK-NEXT: .param .align 4 .b8 param1[4]; -; CHECK-NEXT: st.param.b32 [param1], %r2; ; CHECK-NEXT: .param .align 4 .b8 retval0[4]; +; CHECK-NEXT: st.param.b32 [param1], %r2; +; CHECK-NEXT: st.param.b32 [param0], %r1; ; CHECK-NEXT: call.uni (retval0), test_callee, (param0, param1); ; CHECK-NEXT: ld.param.b32 %r3, [retval0]; ; CHECK-NEXT: } // callseq 0 @@ -469,10 +485,10 @@ define <2 x half> @test_call_flipped(<2 x half> %a, <2 x half> %b) #0 { ; CHECK-NEXT: ld.param.b32 %r1, [test_call_flipped_param_0]; ; CHECK-NEXT: { // callseq 1, 0 ; CHECK-NEXT: .param .align 4 .b8 param0[4]; -; CHECK-NEXT: st.param.b32 [param0], %r2; ; CHECK-NEXT: .param .align 4 .b8 param1[4]; -; CHECK-NEXT: st.param.b32 [param1], %r1; ; CHECK-NEXT: .param .align 4 .b8 retval0[4]; +; CHECK-NEXT: st.param.b32 [param1], %r1; +; CHECK-NEXT: st.param.b32 [param0], %r2; ; CHECK-NEXT: call.uni (retval0), test_callee, (param0, param1); ; CHECK-NEXT: ld.param.b32 %r3, [retval0]; ; CHECK-NEXT: } // callseq 1 @@ -492,10 +508,10 @@ define <2 x half> @test_tailcall_flipped(<2 x half> %a, <2 x half> %b) #0 { ; CHECK-NEXT: ld.param.b32 %r1, [test_tailcall_flipped_param_0]; ; CHECK-NEXT: { // callseq 2, 0 ; CHECK-NEXT: .param .align 4 .b8 param0[4]; -; CHECK-NEXT: st.param.b32 [param0], %r2; ; CHECK-NEXT: .param .align 4 .b8 param1[4]; -; CHECK-NEXT: st.param.b32 [param1], %r1; ; CHECK-NEXT: .param .align 4 .b8 retval0[4]; +; CHECK-NEXT: st.param.b32 [param1], %r1; +; CHECK-NEXT: st.param.b32 [param0], %r2; ; CHECK-NEXT: call.uni (retval0), test_callee, (param0, param1); ; CHECK-NEXT: ld.param.b32 %r3, [retval0]; ; CHECK-NEXT: } // callseq 2 @@ -535,11 +551,13 @@ define <2 x half> @test_select_cc(<2 x half> %a, <2 x half> %b, <2 x half> %c, < ; CHECK-F16-NEXT: // %bb.0: ; CHECK-F16-NEXT: ld.param.b32 %r4, [test_select_cc_param_3]; ; CHECK-F16-NEXT: ld.param.b32 %r3, [test_select_cc_param_2]; -; CHECK-F16-NEXT: ld.param.v2.b16 {%rs1, %rs2}, [test_select_cc_param_0]; +; CHECK-F16-NEXT: ld.param.b32 %r2, [test_select_cc_param_1]; +; CHECK-F16-NEXT: ld.param.b32 %r1, [test_select_cc_param_0]; ; CHECK-F16-NEXT: setp.neu.f16x2 %p1|%p2, %r3, %r4; -; CHECK-F16-NEXT: ld.param.v2.b16 {%rs3, %rs4}, [test_select_cc_param_1]; -; CHECK-F16-NEXT: selp.b16 %rs5, %rs2, %rs4, %p2; -; CHECK-F16-NEXT: selp.b16 %rs6, %rs1, %rs3, %p1; +; CHECK-F16-NEXT: mov.b32 {%rs1, %rs2}, %r2; +; CHECK-F16-NEXT: mov.b32 {%rs3, %rs4}, %r1; +; CHECK-F16-NEXT: selp.b16 %rs5, %rs4, %rs2, %p2; +; CHECK-F16-NEXT: selp.b16 %rs6, %rs3, %rs1, %p1; ; CHECK-F16-NEXT: st.param.v2.b16 [func_retval0], {%rs6, %rs5}; ; CHECK-F16-NEXT: ret; ; @@ -550,18 +568,22 @@ define <2 x half> @test_select_cc(<2 x half> %a, <2 x half> %b, <2 x half> %c, < ; CHECK-NOF16-NEXT: .reg .b32 %r<9>; ; CHECK-NOF16-EMPTY: ; CHECK-NOF16-NEXT: // %bb.0: -; CHECK-NOF16-NEXT: ld.param.v2.b16 {%rs1, %rs2}, [test_select_cc_param_0]; -; CHECK-NOF16-NEXT: ld.param.v2.b16 {%rs3, %rs4}, [test_select_cc_param_3]; -; CHECK-NOF16-NEXT: cvt.f32.f16 %r5, %rs3; -; CHECK-NOF16-NEXT: ld.param.v2.b16 {%rs5, %rs6}, [test_select_cc_param_2]; -; CHECK-NOF16-NEXT: cvt.f32.f16 %r6, %rs5; +; CHECK-NOF16-NEXT: ld.param.b32 %r4, [test_select_cc_param_3]; +; CHECK-NOF16-NEXT: ld.param.b32 %r3, [test_select_cc_param_2]; +; CHECK-NOF16-NEXT: ld.param.b32 %r2, [test_select_cc_param_1]; +; CHECK-NOF16-NEXT: ld.param.b32 %r1, [test_select_cc_param_0]; +; CHECK-NOF16-NEXT: mov.b32 {%rs1, %rs2}, %r4; +; CHECK-NOF16-NEXT: cvt.f32.f16 %r5, %rs1; +; CHECK-NOF16-NEXT: mov.b32 {%rs3, %rs4}, %r3; +; CHECK-NOF16-NEXT: cvt.f32.f16 %r6, %rs3; ; CHECK-NOF16-NEXT: setp.neu.f32 %p1, %r6, %r5; -; CHECK-NOF16-NEXT: cvt.f32.f16 %r7, %rs4; -; CHECK-NOF16-NEXT: cvt.f32.f16 %r8, %rs6; +; CHECK-NOF16-NEXT: cvt.f32.f16 %r7, %rs2; +; CHECK-NOF16-NEXT: cvt.f32.f16 %r8, %rs4; ; CHECK-NOF16-NEXT: setp.neu.f32 %p2, %r8, %r7; -; CHECK-NOF16-NEXT: ld.param.v2.b16 {%rs7, %rs8}, [test_select_cc_param_1]; -; CHECK-NOF16-NEXT: selp.b16 %rs9, %rs2, %rs8, %p2; -; CHECK-NOF16-NEXT: selp.b16 %rs10, %rs1, %rs7, %p1; +; CHECK-NOF16-NEXT: mov.b32 {%rs5, %rs6}, %r2; +; CHECK-NOF16-NEXT: mov.b32 {%rs7, %rs8}, %r1; +; CHECK-NOF16-NEXT: selp.b16 %rs9, %rs8, %rs6, %p2; +; CHECK-NOF16-NEXT: selp.b16 %rs10, %rs7, %rs5, %p1; ; CHECK-NOF16-NEXT: st.param.v2.b16 [func_retval0], {%rs10, %rs9}; ; CHECK-NOF16-NEXT: ret; %cc = fcmp une <2 x half> %c, %d @@ -579,11 +601,13 @@ define <2 x float> @test_select_cc_f32_f16(<2 x float> %a, <2 x float> %b, ; CHECK-F16-NEXT: // %bb.0: ; CHECK-F16-NEXT: ld.param.b32 %r2, [test_select_cc_f32_f16_param_3]; ; CHECK-F16-NEXT: ld.param.b32 %r1, [test_select_cc_f32_f16_param_2]; -; CHECK-F16-NEXT: ld.param.v2.b32 {%r3, %r4}, [test_select_cc_f32_f16_param_0]; +; CHECK-F16-NEXT: ld.param.b64 %rd2, [test_select_cc_f32_f16_param_1]; +; CHECK-F16-NEXT: ld.param.b64 %rd1, [test_select_cc_f32_f16_param_0]; ; CHECK-F16-NEXT: setp.neu.f16x2 %p1|%p2, %r1, %r2; -; CHECK-F16-NEXT: ld.param.v2.b32 {%r5, %r6}, [test_select_cc_f32_f16_param_1]; -; CHECK-F16-NEXT: selp.f32 %r7, %r4, %r6, %p2; -; CHECK-F16-NEXT: selp.f32 %r8, %r3, %r5, %p1; +; CHECK-F16-NEXT: mov.b64 {%r3, %r4}, %rd2; +; CHECK-F16-NEXT: mov.b64 {%r5, %r6}, %rd1; +; CHECK-F16-NEXT: selp.f32 %r7, %r6, %r4, %p2; +; CHECK-F16-NEXT: selp.f32 %r8, %r5, %r3, %p1; ; CHECK-F16-NEXT: st.param.v2.b32 [func_retval0], {%r8, %r7}; ; CHECK-F16-NEXT: ret; ; @@ -595,18 +619,22 @@ define <2 x float> @test_select_cc_f32_f16(<2 x float> %a, <2 x float> %b, ; CHECK-NOF16-NEXT: .reg .b64 %rd<3>; ; CHECK-NOF16-EMPTY: ; CHECK-NOF16-NEXT: // %bb.0: -; CHECK-NOF16-NEXT: ld.param.v2.b32 {%r3, %r4}, [test_select_cc_f32_f16_param_0]; -; CHECK-NOF16-NEXT: ld.param.v2.b16 {%rs1, %rs2}, [test_select_cc_f32_f16_param_3]; -; CHECK-NOF16-NEXT: cvt.f32.f16 %r5, %rs1; -; CHECK-NOF16-NEXT: ld.param.v2.b16 {%rs3, %rs4}, [test_select_cc_f32_f16_param_2]; -; CHECK-NOF16-NEXT: cvt.f32.f16 %r6, %rs3; -; CHECK-NOF16-NEXT: setp.neu.f32 %p1, %r6, %r5; -; CHECK-NOF16-NEXT: cvt.f32.f16 %r7, %rs2; -; CHECK-NOF16-NEXT: cvt.f32.f16 %r8, %rs4; -; CHECK-NOF16-NEXT: setp.neu.f32 %p2, %r8, %r7; -; CHECK-NOF16-NEXT: ld.param.v2.b32 {%r9, %r10}, [test_select_cc_f32_f16_param_1]; -; CHECK-NOF16-NEXT: selp.f32 %r11, %r4, %r10, %p2; -; CHECK-NOF16-NEXT: selp.f32 %r12, %r3, %r9, %p1; +; CHECK-NOF16-NEXT: ld.param.b32 %r2, [test_select_cc_f32_f16_param_3]; +; CHECK-NOF16-NEXT: ld.param.b32 %r1, [test_select_cc_f32_f16_param_2]; +; CHECK-NOF16-NEXT: ld.param.b64 %rd2, [test_select_cc_f32_f16_param_1]; +; CHECK-NOF16-NEXT: ld.param.b64 %rd1, [test_select_cc_f32_f16_param_0]; +; CHECK-NOF16-NEXT: mov.b32 {%rs1, %rs2}, %r2; +; CHECK-NOF16-NEXT: cvt.f32.f16 %r3, %rs1; +; CHECK-NOF16-NEXT: mov.b32 {%rs3, %rs4}, %r1; +; CHECK-NOF16-NEXT: cvt.f32.f16 %r4, %rs3; +; CHECK-NOF16-NEXT: setp.neu.f32 %p1, %r4, %r3; +; CHECK-NOF16-NEXT: cvt.f32.f16 %r5, %rs2; +; CHECK-NOF16-NEXT: cvt.f32.f16 %r6, %rs4; +; CHECK-NOF16-NEXT: setp.neu.f32 %p2, %r6, %r5; +; CHECK-NOF16-NEXT: mov.b64 {%r7, %r8}, %rd2; +; CHECK-NOF16-NEXT: mov.b64 {%r9, %r10}, %rd1; +; CHECK-NOF16-NEXT: selp.f32 %r11, %r10, %r8, %p2; +; CHECK-NOF16-NEXT: selp.f32 %r12, %r9, %r7, %p1; ; CHECK-NOF16-NEXT: st.param.v2.b32 [func_retval0], {%r12, %r11}; ; CHECK-NOF16-NEXT: ret; <2 x half> %c, <2 x half> %d) #0 { @@ -624,14 +652,18 @@ define <2 x half> @test_select_cc_f16_f32(<2 x half> %a, <2 x half> %b, ; CHECK-NEXT: .reg .b64 %rd<3>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: ld.param.v2.b16 {%rs1, %rs2}, [test_select_cc_f16_f32_param_0]; -; CHECK-NEXT: ld.param.v2.b32 {%r3, %r4}, [test_select_cc_f16_f32_param_2]; -; CHECK-NEXT: ld.param.v2.b32 {%r5, %r6}, [test_select_cc_f16_f32_param_3]; -; CHECK-NEXT: setp.neu.f32 %p1, %r3, %r5; -; CHECK-NEXT: setp.neu.f32 %p2, %r4, %r6; -; CHECK-NEXT: ld.param.v2.b16 {%rs3, %rs4}, [test_select_cc_f16_f32_param_1]; -; CHECK-NEXT: selp.b16 %rs5, %rs2, %rs4, %p2; -; CHECK-NEXT: selp.b16 %rs6, %rs1, %rs3, %p1; +; CHECK-NEXT: ld.param.b64 %rd2, [test_select_cc_f16_f32_param_3]; +; CHECK-NEXT: ld.param.b64 %rd1, [test_select_cc_f16_f32_param_2]; +; CHECK-NEXT: ld.param.b32 %r2, [test_select_cc_f16_f32_param_1]; +; CHECK-NEXT: ld.param.b32 %r1, [test_select_cc_f16_f32_param_0]; +; CHECK-NEXT: mov.b64 {%r3, %r4}, %rd2; +; CHECK-NEXT: mov.b64 {%r5, %r6}, %rd1; +; CHECK-NEXT: setp.neu.f32 %p1, %r5, %r3; +; CHECK-NEXT: setp.neu.f32 %p2, %r6, %r4; +; CHECK-NEXT: mov.b32 {%rs1, %rs2}, %r2; +; CHECK-NEXT: mov.b32 {%rs3, %rs4}, %r1; +; CHECK-NEXT: selp.b16 %rs5, %rs4, %rs2, %p2; +; CHECK-NEXT: selp.b16 %rs6, %rs3, %rs1, %p1; ; CHECK-NEXT: st.param.v2.b16 [func_retval0], {%rs6, %rs5}; ; CHECK-NEXT: ret; <2 x float> %c, <2 x float> %d) #0 { @@ -664,13 +696,15 @@ define <2 x i1> @test_fcmp_une(<2 x half> %a, <2 x half> %b) #0 { ; CHECK-NOF16-NEXT: .reg .b32 %r<7>; ; CHECK-NOF16-EMPTY: ; CHECK-NOF16-NEXT: // %bb.0: -; CHECK-NOF16-NEXT: ld.param.v2.b16 {%rs1, %rs2}, [test_fcmp_une_param_0]; -; CHECK-NOF16-NEXT: ld.param.v2.b16 {%rs3, %rs4}, [test_fcmp_une_param_1]; -; CHECK-NOF16-NEXT: cvt.f32.f16 %r3, %rs4; -; CHECK-NOF16-NEXT: cvt.f32.f16 %r4, %rs2; +; CHECK-NOF16-NEXT: ld.param.b32 %r2, [test_fcmp_une_param_1]; +; CHECK-NOF16-NEXT: ld.param.b32 %r1, [test_fcmp_une_param_0]; +; CHECK-NOF16-NEXT: mov.b32 {%rs1, %rs2}, %r2; +; CHECK-NOF16-NEXT: cvt.f32.f16 %r3, %rs2; +; CHECK-NOF16-NEXT: mov.b32 {%rs3, %rs4}, %r1; +; CHECK-NOF16-NEXT: cvt.f32.f16 %r4, %rs4; ; CHECK-NOF16-NEXT: setp.neu.f32 %p1, %r4, %r3; -; CHECK-NOF16-NEXT: cvt.f32.f16 %r5, %rs3; -; CHECK-NOF16-NEXT: cvt.f32.f16 %r6, %rs1; +; CHECK-NOF16-NEXT: cvt.f32.f16 %r5, %rs1; +; CHECK-NOF16-NEXT: cvt.f32.f16 %r6, %rs3; ; CHECK-NOF16-NEXT: setp.neu.f32 %p2, %r6, %r5; ; CHECK-NOF16-NEXT: selp.b16 %rs5, -1, 0, %p2; ; CHECK-NOF16-NEXT: st.param.b8 [func_retval0], %rs5; @@ -705,13 +739,15 @@ define <2 x i1> @test_fcmp_ueq(<2 x half> %a, <2 x half> %b) #0 { ; CHECK-NOF16-NEXT: .reg .b32 %r<7>; ; CHECK-NOF16-EMPTY: ; CHECK-NOF16-NEXT: // %bb.0: -; CHECK-NOF16-NEXT: ld.param.v2.b16 {%rs1, %rs2}, [test_fcmp_ueq_param_0]; -; CHECK-NOF16-NEXT: ld.param.v2.b16 {%rs3, %rs4}, [test_fcmp_ueq_param_1]; -; CHECK-NOF16-NEXT: cvt.f32.f16 %r3, %rs4; -; CHECK-NOF16-NEXT: cvt.f32.f16 %r4, %rs2; +; CHECK-NOF16-NEXT: ld.param.b32 %r2, [test_fcmp_ueq_param_1]; +; CHECK-NOF16-NEXT: ld.param.b32 %r1, [test_fcmp_ueq_param_0]; +; CHECK-NOF16-NEXT: mov.b32 {%rs1, %rs2}, %r2; +; CHECK-NOF16-NEXT: cvt.f32.f16 %r3, %rs2; +; CHECK-NOF16-NEXT: mov.b32 {%rs3, %rs4}, %r1; +; CHECK-NOF16-NEXT: cvt.f32.f16 %r4, %rs4; ; CHECK-NOF16-NEXT: setp.equ.f32 %p1, %r4, %r3; -; CHECK-NOF16-NEXT: cvt.f32.f16 %r5, %rs3; -; CHECK-NOF16-NEXT: cvt.f32.f16 %r6, %rs1; +; CHECK-NOF16-NEXT: cvt.f32.f16 %r5, %rs1; +; CHECK-NOF16-NEXT: cvt.f32.f16 %r6, %rs3; ; CHECK-NOF16-NEXT: setp.equ.f32 %p2, %r6, %r5; ; CHECK-NOF16-NEXT: selp.b16 %rs5, -1, 0, %p2; ; CHECK-NOF16-NEXT: st.param.b8 [func_retval0], %rs5; @@ -746,13 +782,15 @@ define <2 x i1> @test_fcmp_ugt(<2 x half> %a, <2 x half> %b) #0 { ; CHECK-NOF16-NEXT: .reg .b32 %r<7>; ; CHECK-NOF16-EMPTY: ; CHECK-NOF16-NEXT: // %bb.0: -; CHECK-NOF16-NEXT: ld.param.v2.b16 {%rs1, %rs2}, [test_fcmp_ugt_param_0]; -; CHECK-NOF16-NEXT: ld.param.v2.b16 {%rs3, %rs4}, [test_fcmp_ugt_param_1]; -; CHECK-NOF16-NEXT: cvt.f32.f16 %r3, %rs4; -; CHECK-NOF16-NEXT: cvt.f32.f16 %r4, %rs2; +; CHECK-NOF16-NEXT: ld.param.b32 %r2, [test_fcmp_ugt_param_1]; +; CHECK-NOF16-NEXT: ld.param.b32 %r1, [test_fcmp_ugt_param_0]; +; CHECK-NOF16-NEXT: mov.b32 {%rs1, %rs2}, %r2; +; CHECK-NOF16-NEXT: cvt.f32.f16 %r3, %rs2; +; CHECK-NOF16-NEXT: mov.b32 {%rs3, %rs4}, %r1; +; CHECK-NOF16-NEXT: cvt.f32.f16 %r4, %rs4; ; CHECK-NOF16-NEXT: setp.gtu.f32 %p1, %r4, %r3; -; CHECK-NOF16-NEXT: cvt.f32.f16 %r5, %rs3; -; CHECK-NOF16-NEXT: cvt.f32.f16 %r6, %rs1; +; CHECK-NOF16-NEXT: cvt.f32.f16 %r5, %rs1; +; CHECK-NOF16-NEXT: cvt.f32.f16 %r6, %rs3; ; CHECK-NOF16-NEXT: setp.gtu.f32 %p2, %r6, %r5; ; CHECK-NOF16-NEXT: selp.b16 %rs5, -1, 0, %p2; ; CHECK-NOF16-NEXT: st.param.b8 [func_retval0], %rs5; @@ -787,13 +825,15 @@ define <2 x i1> @test_fcmp_uge(<2 x half> %a, <2 x half> %b) #0 { ; CHECK-NOF16-NEXT: .reg .b32 %r<7>; ; CHECK-NOF16-EMPTY: ; CHECK-NOF16-NEXT: // %bb.0: -; CHECK-NOF16-NEXT: ld.param.v2.b16 {%rs1, %rs2}, [test_fcmp_uge_param_0]; -; CHECK-NOF16-NEXT: ld.param.v2.b16 {%rs3, %rs4}, [test_fcmp_uge_param_1]; -; CHECK-NOF16-NEXT: cvt.f32.f16 %r3, %rs4; -; CHECK-NOF16-NEXT: cvt.f32.f16 %r4, %rs2; +; CHECK-NOF16-NEXT: ld.param.b32 %r2, [test_fcmp_uge_param_1]; +; CHECK-NOF16-NEXT: ld.param.b32 %r1, [test_fcmp_uge_param_0]; +; CHECK-NOF16-NEXT: mov.b32 {%rs1, %rs2}, %r2; +; CHECK-NOF16-NEXT: cvt.f32.f16 %r3, %rs2; +; CHECK-NOF16-NEXT: mov.b32 {%rs3, %rs4}, %r1; +; CHECK-NOF16-NEXT: cvt.f32.f16 %r4, %rs4; ; CHECK-NOF16-NEXT: setp.geu.f32 %p1, %r4, %r3; -; CHECK-NOF16-NEXT: cvt.f32.f16 %r5, %rs3; -; CHECK-NOF16-NEXT: cvt.f32.f16 %r6, %rs1; +; CHECK-NOF16-NEXT: cvt.f32.f16 %r5, %rs1; +; CHECK-NOF16-NEXT: cvt.f32.f16 %r6, %rs3; ; CHECK-NOF16-NEXT: setp.geu.f32 %p2, %r6, %r5; ; CHECK-NOF16-NEXT: selp.b16 %rs5, -1, 0, %p2; ; CHECK-NOF16-NEXT: st.param.b8 [func_retval0], %rs5; @@ -828,13 +868,15 @@ define <2 x i1> @test_fcmp_ult(<2 x half> %a, <2 x half> %b) #0 { ; CHECK-NOF16-NEXT: .reg .b32 %r<7>; ; CHECK-NOF16-EMPTY: ; CHECK-NOF16-NEXT: // %bb.0: -; CHECK-NOF16-NEXT: ld.param.v2.b16 {%rs1, %rs2}, [test_fcmp_ult_param_0]; -; CHECK-NOF16-NEXT: ld.param.v2.b16 {%rs3, %rs4}, [test_fcmp_ult_param_1]; -; CHECK-NOF16-NEXT: cvt.f32.f16 %r3, %rs4; -; CHECK-NOF16-NEXT: cvt.f32.f16 %r4, %rs2; +; CHECK-NOF16-NEXT: ld.param.b32 %r2, [test_fcmp_ult_param_1]; +; CHECK-NOF16-NEXT: ld.param.b32 %r1, [test_fcmp_ult_param_0]; +; CHECK-NOF16-NEXT: mov.b32 {%rs1, %rs2}, %r2; +; CHECK-NOF16-NEXT: cvt.f32.f16 %r3, %rs2; +; CHECK-NOF16-NEXT: mov.b32 {%rs3, %rs4}, %r1; +; CHECK-NOF16-NEXT: cvt.f32.f16 %r4, %rs4; ; CHECK-NOF16-NEXT: setp.ltu.f32 %p1, %r4, %r3; -; CHECK-NOF16-NEXT: cvt.f32.f16 %r5, %rs3; -; CHECK-NOF16-NEXT: cvt.f32.f16 %r6, %rs1; +; CHECK-NOF16-NEXT: cvt.f32.f16 %r5, %rs1; +; CHECK-NOF16-NEXT: cvt.f32.f16 %r6, %rs3; ; CHECK-NOF16-NEXT: setp.ltu.f32 %p2, %r6, %r5; ; CHECK-NOF16-NEXT: selp.b16 %rs5, -1, 0, %p2; ; CHECK-NOF16-NEXT: st.param.b8 [func_retval0], %rs5; @@ -869,13 +911,15 @@ define <2 x i1> @test_fcmp_ule(<2 x half> %a, <2 x half> %b) #0 { ; CHECK-NOF16-NEXT: .reg .b32 %r<7>; ; CHECK-NOF16-EMPTY: ; CHECK-NOF16-NEXT: // %bb.0: -; CHECK-NOF16-NEXT: ld.param.v2.b16 {%rs1, %rs2}, [test_fcmp_ule_param_0]; -; CHECK-NOF16-NEXT: ld.param.v2.b16 {%rs3, %rs4}, [test_fcmp_ule_param_1]; -; CHECK-NOF16-NEXT: cvt.f32.f16 %r3, %rs4; -; CHECK-NOF16-NEXT: cvt.f32.f16 %r4, %rs2; +; CHECK-NOF16-NEXT: ld.param.b32 %r2, [test_fcmp_ule_param_1]; +; CHECK-NOF16-NEXT: ld.param.b32 %r1, [test_fcmp_ule_param_0]; +; CHECK-NOF16-NEXT: mov.b32 {%rs1, %rs2}, %r2; +; CHECK-NOF16-NEXT: cvt.f32.f16 %r3, %rs2; +; CHECK-NOF16-NEXT: mov.b32 {%rs3, %rs4}, %r1; +; CHECK-NOF16-NEXT: cvt.f32.f16 %r4, %rs4; ; CHECK-NOF16-NEXT: setp.leu.f32 %p1, %r4, %r3; -; CHECK-NOF16-NEXT: cvt.f32.f16 %r5, %rs3; -; CHECK-NOF16-NEXT: cvt.f32.f16 %r6, %rs1; +; CHECK-NOF16-NEXT: cvt.f32.f16 %r5, %rs1; +; CHECK-NOF16-NEXT: cvt.f32.f16 %r6, %rs3; ; CHECK-NOF16-NEXT: setp.leu.f32 %p2, %r6, %r5; ; CHECK-NOF16-NEXT: selp.b16 %rs5, -1, 0, %p2; ; CHECK-NOF16-NEXT: st.param.b8 [func_retval0], %rs5; @@ -911,13 +955,15 @@ define <2 x i1> @test_fcmp_uno(<2 x half> %a, <2 x half> %b) #0 { ; CHECK-NOF16-NEXT: .reg .b32 %r<7>; ; CHECK-NOF16-EMPTY: ; CHECK-NOF16-NEXT: // %bb.0: -; CHECK-NOF16-NEXT: ld.param.v2.b16 {%rs1, %rs2}, [test_fcmp_uno_param_0]; -; CHECK-NOF16-NEXT: ld.param.v2.b16 {%rs3, %rs4}, [test_fcmp_uno_param_1]; -; CHECK-NOF16-NEXT: cvt.f32.f16 %r3, %rs4; -; CHECK-NOF16-NEXT: cvt.f32.f16 %r4, %rs2; +; CHECK-NOF16-NEXT: ld.param.b32 %r2, [test_fcmp_uno_param_1]; +; CHECK-NOF16-NEXT: ld.param.b32 %r1, [test_fcmp_uno_param_0]; +; CHECK-NOF16-NEXT: mov.b32 {%rs1, %rs2}, %r2; +; CHECK-NOF16-NEXT: cvt.f32.f16 %r3, %rs2; +; CHECK-NOF16-NEXT: mov.b32 {%rs3, %rs4}, %r1; +; CHECK-NOF16-NEXT: cvt.f32.f16 %r4, %rs4; ; CHECK-NOF16-NEXT: setp.nan.f32 %p1, %r4, %r3; -; CHECK-NOF16-NEXT: cvt.f32.f16 %r5, %rs3; -; CHECK-NOF16-NEXT: cvt.f32.f16 %r6, %rs1; +; CHECK-NOF16-NEXT: cvt.f32.f16 %r5, %rs1; +; CHECK-NOF16-NEXT: cvt.f32.f16 %r6, %rs3; ; CHECK-NOF16-NEXT: setp.nan.f32 %p2, %r6, %r5; ; CHECK-NOF16-NEXT: selp.b16 %rs5, -1, 0, %p2; ; CHECK-NOF16-NEXT: st.param.b8 [func_retval0], %rs5; @@ -952,13 +998,15 @@ define <2 x i1> @test_fcmp_one(<2 x half> %a, <2 x half> %b) #0 { ; CHECK-NOF16-NEXT: .reg .b32 %r<7>; ; CHECK-NOF16-EMPTY: ; CHECK-NOF16-NEXT: // %bb.0: -; CHECK-NOF16-NEXT: ld.param.v2.b16 {%rs1, %rs2}, [test_fcmp_one_param_0]; -; CHECK-NOF16-NEXT: ld.param.v2.b16 {%rs3, %rs4}, [test_fcmp_one_param_1]; -; CHECK-NOF16-NEXT: cvt.f32.f16 %r3, %rs4; -; CHECK-NOF16-NEXT: cvt.f32.f16 %r4, %rs2; +; CHECK-NOF16-NEXT: ld.param.b32 %r2, [test_fcmp_one_param_1]; +; CHECK-NOF16-NEXT: ld.param.b32 %r1, [test_fcmp_one_param_0]; +; CHECK-NOF16-NEXT: mov.b32 {%rs1, %rs2}, %r2; +; CHECK-NOF16-NEXT: cvt.f32.f16 %r3, %rs2; +; CHECK-NOF16-NEXT: mov.b32 {%rs3, %rs4}, %r1; +; CHECK-NOF16-NEXT: cvt.f32.f16 %r4, %rs4; ; CHECK-NOF16-NEXT: setp.ne.f32 %p1, %r4, %r3; -; CHECK-NOF16-NEXT: cvt.f32.f16 %r5, %rs3; -; CHECK-NOF16-NEXT: cvt.f32.f16 %r6, %rs1; +; CHECK-NOF16-NEXT: cvt.f32.f16 %r5, %rs1; +; CHECK-NOF16-NEXT: cvt.f32.f16 %r6, %rs3; ; CHECK-NOF16-NEXT: setp.ne.f32 %p2, %r6, %r5; ; CHECK-NOF16-NEXT: selp.b16 %rs5, -1, 0, %p2; ; CHECK-NOF16-NEXT: st.param.b8 [func_retval0], %rs5; @@ -993,13 +1041,15 @@ define <2 x i1> @test_fcmp_oeq(<2 x half> %a, <2 x half> %b) #0 { ; CHECK-NOF16-NEXT: .reg .b32 %r<7>; ; CHECK-NOF16-EMPTY: ; CHECK-NOF16-NEXT: // %bb.0: -; CHECK-NOF16-NEXT: ld.param.v2.b16 {%rs1, %rs2}, [test_fcmp_oeq_param_0]; -; CHECK-NOF16-NEXT: ld.param.v2.b16 {%rs3, %rs4}, [test_fcmp_oeq_param_1]; -; CHECK-NOF16-NEXT: cvt.f32.f16 %r3, %rs4; -; CHECK-NOF16-NEXT: cvt.f32.f16 %r4, %rs2; +; CHECK-NOF16-NEXT: ld.param.b32 %r2, [test_fcmp_oeq_param_1]; +; CHECK-NOF16-NEXT: ld.param.b32 %r1, [test_fcmp_oeq_param_0]; +; CHECK-NOF16-NEXT: mov.b32 {%rs1, %rs2}, %r2; +; CHECK-NOF16-NEXT: cvt.f32.f16 %r3, %rs2; +; CHECK-NOF16-NEXT: mov.b32 {%rs3, %rs4}, %r1; +; CHECK-NOF16-NEXT: cvt.f32.f16 %r4, %rs4; ; CHECK-NOF16-NEXT: setp.eq.f32 %p1, %r4, %r3; -; CHECK-NOF16-NEXT: cvt.f32.f16 %r5, %rs3; -; CHECK-NOF16-NEXT: cvt.f32.f16 %r6, %rs1; +; CHECK-NOF16-NEXT: cvt.f32.f16 %r5, %rs1; +; CHECK-NOF16-NEXT: cvt.f32.f16 %r6, %rs3; ; CHECK-NOF16-NEXT: setp.eq.f32 %p2, %r6, %r5; ; CHECK-NOF16-NEXT: selp.b16 %rs5, -1, 0, %p2; ; CHECK-NOF16-NEXT: st.param.b8 [func_retval0], %rs5; @@ -1034,13 +1084,15 @@ define <2 x i1> @test_fcmp_ogt(<2 x half> %a, <2 x half> %b) #0 { ; CHECK-NOF16-NEXT: .reg .b32 %r<7>; ; CHECK-NOF16-EMPTY: ; CHECK-NOF16-NEXT: // %bb.0: -; CHECK-NOF16-NEXT: ld.param.v2.b16 {%rs1, %rs2}, [test_fcmp_ogt_param_0]; -; CHECK-NOF16-NEXT: ld.param.v2.b16 {%rs3, %rs4}, [test_fcmp_ogt_param_1]; -; CHECK-NOF16-NEXT: cvt.f32.f16 %r3, %rs4; -; CHECK-NOF16-NEXT: cvt.f32.f16 %r4, %rs2; +; CHECK-NOF16-NEXT: ld.param.b32 %r2, [test_fcmp_ogt_param_1]; +; CHECK-NOF16-NEXT: ld.param.b32 %r1, [test_fcmp_ogt_param_0]; +; CHECK-NOF16-NEXT: mov.b32 {%rs1, %rs2}, %r2; +; CHECK-NOF16-NEXT: cvt.f32.f16 %r3, %rs2; +; CHECK-NOF16-NEXT: mov.b32 {%rs3, %rs4}, %r1; +; CHECK-NOF16-NEXT: cvt.f32.f16 %r4, %rs4; ; CHECK-NOF16-NEXT: setp.gt.f32 %p1, %r4, %r3; -; CHECK-NOF16-NEXT: cvt.f32.f16 %r5, %rs3; -; CHECK-NOF16-NEXT: cvt.f32.f16 %r6, %rs1; +; CHECK-NOF16-NEXT: cvt.f32.f16 %r5, %rs1; +; CHECK-NOF16-NEXT: cvt.f32.f16 %r6, %rs3; ; CHECK-NOF16-NEXT: setp.gt.f32 %p2, %r6, %r5; ; CHECK-NOF16-NEXT: selp.b16 %rs5, -1, 0, %p2; ; CHECK-NOF16-NEXT: st.param.b8 [func_retval0], %rs5; @@ -1075,13 +1127,15 @@ define <2 x i1> @test_fcmp_oge(<2 x half> %a, <2 x half> %b) #0 { ; CHECK-NOF16-NEXT: .reg .b32 %r<7>; ; CHECK-NOF16-EMPTY: ; CHECK-NOF16-NEXT: // %bb.0: -; CHECK-NOF16-NEXT: ld.param.v2.b16 {%rs1, %rs2}, [test_fcmp_oge_param_0]; -; CHECK-NOF16-NEXT: ld.param.v2.b16 {%rs3, %rs4}, [test_fcmp_oge_param_1]; -; CHECK-NOF16-NEXT: cvt.f32.f16 %r3, %rs4; -; CHECK-NOF16-NEXT: cvt.f32.f16 %r4, %rs2; +; CHECK-NOF16-NEXT: ld.param.b32 %r2, [test_fcmp_oge_param_1]; +; CHECK-NOF16-NEXT: ld.param.b32 %r1, [test_fcmp_oge_param_0]; +; CHECK-NOF16-NEXT: mov.b32 {%rs1, %rs2}, %r2; +; CHECK-NOF16-NEXT: cvt.f32.f16 %r3, %rs2; +; CHECK-NOF16-NEXT: mov.b32 {%rs3, %rs4}, %r1; +; CHECK-NOF16-NEXT: cvt.f32.f16 %r4, %rs4; ; CHECK-NOF16-NEXT: setp.ge.f32 %p1, %r4, %r3; -; CHECK-NOF16-NEXT: cvt.f32.f16 %r5, %rs3; -; CHECK-NOF16-NEXT: cvt.f32.f16 %r6, %rs1; +; CHECK-NOF16-NEXT: cvt.f32.f16 %r5, %rs1; +; CHECK-NOF16-NEXT: cvt.f32.f16 %r6, %rs3; ; CHECK-NOF16-NEXT: setp.ge.f32 %p2, %r6, %r5; ; CHECK-NOF16-NEXT: selp.b16 %rs5, -1, 0, %p2; ; CHECK-NOF16-NEXT: st.param.b8 [func_retval0], %rs5; @@ -1116,13 +1170,15 @@ define <2 x i1> @test_fcmp_olt(<2 x half> %a, <2 x half> %b) #0 { ; CHECK-NOF16-NEXT: .reg .b32 %r<7>; ; CHECK-NOF16-EMPTY: ; CHECK-NOF16-NEXT: // %bb.0: -; CHECK-NOF16-NEXT: ld.param.v2.b16 {%rs1, %rs2}, [test_fcmp_olt_param_0]; -; CHECK-NOF16-NEXT: ld.param.v2.b16 {%rs3, %rs4}, [test_fcmp_olt_param_1]; -; CHECK-NOF16-NEXT: cvt.f32.f16 %r3, %rs4; -; CHECK-NOF16-NEXT: cvt.f32.f16 %r4, %rs2; +; CHECK-NOF16-NEXT: ld.param.b32 %r2, [test_fcmp_olt_param_1]; +; CHECK-NOF16-NEXT: ld.param.b32 %r1, [test_fcmp_olt_param_0]; +; CHECK-NOF16-NEXT: mov.b32 {%rs1, %rs2}, %r2; +; CHECK-NOF16-NEXT: cvt.f32.f16 %r3, %rs2; +; CHECK-NOF16-NEXT: mov.b32 {%rs3, %rs4}, %r1; +; CHECK-NOF16-NEXT: cvt.f32.f16 %r4, %rs4; ; CHECK-NOF16-NEXT: setp.lt.f32 %p1, %r4, %r3; -; CHECK-NOF16-NEXT: cvt.f32.f16 %r5, %rs3; -; CHECK-NOF16-NEXT: cvt.f32.f16 %r6, %rs1; +; CHECK-NOF16-NEXT: cvt.f32.f16 %r5, %rs1; +; CHECK-NOF16-NEXT: cvt.f32.f16 %r6, %rs3; ; CHECK-NOF16-NEXT: setp.lt.f32 %p2, %r6, %r5; ; CHECK-NOF16-NEXT: selp.b16 %rs5, -1, 0, %p2; ; CHECK-NOF16-NEXT: st.param.b8 [func_retval0], %rs5; @@ -1157,13 +1213,15 @@ define <2 x i1> @test_fcmp_ole(<2 x half> %a, <2 x half> %b) #0 { ; CHECK-NOF16-NEXT: .reg .b32 %r<7>; ; CHECK-NOF16-EMPTY: ; CHECK-NOF16-NEXT: // %bb.0: -; CHECK-NOF16-NEXT: ld.param.v2.b16 {%rs1, %rs2}, [test_fcmp_ole_param_0]; -; CHECK-NOF16-NEXT: ld.param.v2.b16 {%rs3, %rs4}, [test_fcmp_ole_param_1]; -; CHECK-NOF16-NEXT: cvt.f32.f16 %r3, %rs4; -; CHECK-NOF16-NEXT: cvt.f32.f16 %r4, %rs2; +; CHECK-NOF16-NEXT: ld.param.b32 %r2, [test_fcmp_ole_param_1]; +; CHECK-NOF16-NEXT: ld.param.b32 %r1, [test_fcmp_ole_param_0]; +; CHECK-NOF16-NEXT: mov.b32 {%rs1, %rs2}, %r2; +; CHECK-NOF16-NEXT: cvt.f32.f16 %r3, %rs2; +; CHECK-NOF16-NEXT: mov.b32 {%rs3, %rs4}, %r1; +; CHECK-NOF16-NEXT: cvt.f32.f16 %r4, %rs4; ; CHECK-NOF16-NEXT: setp.le.f32 %p1, %r4, %r3; -; CHECK-NOF16-NEXT: cvt.f32.f16 %r5, %rs3; -; CHECK-NOF16-NEXT: cvt.f32.f16 %r6, %rs1; +; CHECK-NOF16-NEXT: cvt.f32.f16 %r5, %rs1; +; CHECK-NOF16-NEXT: cvt.f32.f16 %r6, %rs3; ; CHECK-NOF16-NEXT: setp.le.f32 %p2, %r6, %r5; ; CHECK-NOF16-NEXT: selp.b16 %rs5, -1, 0, %p2; ; CHECK-NOF16-NEXT: st.param.b8 [func_retval0], %rs5; @@ -1198,13 +1256,15 @@ define <2 x i1> @test_fcmp_ord(<2 x half> %a, <2 x half> %b) #0 { ; CHECK-NOF16-NEXT: .reg .b32 %r<7>; ; CHECK-NOF16-EMPTY: ; CHECK-NOF16-NEXT: // %bb.0: -; CHECK-NOF16-NEXT: ld.param.v2.b16 {%rs1, %rs2}, [test_fcmp_ord_param_0]; -; CHECK-NOF16-NEXT: ld.param.v2.b16 {%rs3, %rs4}, [test_fcmp_ord_param_1]; -; CHECK-NOF16-NEXT: cvt.f32.f16 %r3, %rs4; -; CHECK-NOF16-NEXT: cvt.f32.f16 %r4, %rs2; +; CHECK-NOF16-NEXT: ld.param.b32 %r2, [test_fcmp_ord_param_1]; +; CHECK-NOF16-NEXT: ld.param.b32 %r1, [test_fcmp_ord_param_0]; +; CHECK-NOF16-NEXT: mov.b32 {%rs1, %rs2}, %r2; +; CHECK-NOF16-NEXT: cvt.f32.f16 %r3, %rs2; +; CHECK-NOF16-NEXT: mov.b32 {%rs3, %rs4}, %r1; +; CHECK-NOF16-NEXT: cvt.f32.f16 %r4, %rs4; ; CHECK-NOF16-NEXT: setp.num.f32 %p1, %r4, %r3; -; CHECK-NOF16-NEXT: cvt.f32.f16 %r5, %rs3; -; CHECK-NOF16-NEXT: cvt.f32.f16 %r6, %rs1; +; CHECK-NOF16-NEXT: cvt.f32.f16 %r5, %rs1; +; CHECK-NOF16-NEXT: cvt.f32.f16 %r6, %rs3; ; CHECK-NOF16-NEXT: setp.num.f32 %p2, %r6, %r5; ; CHECK-NOF16-NEXT: selp.b16 %rs5, -1, 0, %p2; ; CHECK-NOF16-NEXT: st.param.b8 [func_retval0], %rs5; @@ -1222,7 +1282,8 @@ define <2 x i32> @test_fptosi_i32(<2 x half> %a) #0 { ; CHECK-NEXT: .reg .b32 %r<4>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: ld.param.v2.b16 {%rs1, %rs2}, [test_fptosi_i32_param_0]; +; CHECK-NEXT: ld.param.b32 %r1, [test_fptosi_i32_param_0]; +; CHECK-NEXT: mov.b32 {%rs1, %rs2}, %r1; ; CHECK-NEXT: cvt.rzi.s32.f16 %r2, %rs2; ; CHECK-NEXT: cvt.rzi.s32.f16 %r3, %rs1; ; CHECK-NEXT: st.param.v2.b32 [func_retval0], {%r3, %r2}; @@ -1239,7 +1300,8 @@ define <2 x i64> @test_fptosi_i64(<2 x half> %a) #0 { ; CHECK-NEXT: .reg .b64 %rd<3>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: ld.param.v2.b16 {%rs1, %rs2}, [test_fptosi_i64_param_0]; +; CHECK-NEXT: ld.param.b32 %r1, [test_fptosi_i64_param_0]; +; CHECK-NEXT: mov.b32 {%rs1, %rs2}, %r1; ; CHECK-NEXT: cvt.rzi.s64.f16 %rd1, %rs2; ; CHECK-NEXT: cvt.rzi.s64.f16 %rd2, %rs1; ; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd2, %rd1}; @@ -1255,7 +1317,8 @@ define <2 x i32> @test_fptoui_2xi32(<2 x half> %a) #0 { ; CHECK-NEXT: .reg .b32 %r<4>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: ld.param.v2.b16 {%rs1, %rs2}, [test_fptoui_2xi32_param_0]; +; CHECK-NEXT: ld.param.b32 %r1, [test_fptoui_2xi32_param_0]; +; CHECK-NEXT: mov.b32 {%rs1, %rs2}, %r1; ; CHECK-NEXT: cvt.rzi.u32.f16 %r2, %rs2; ; CHECK-NEXT: cvt.rzi.u32.f16 %r3, %rs1; ; CHECK-NEXT: st.param.v2.b32 [func_retval0], {%r3, %r2}; @@ -1272,7 +1335,8 @@ define <2 x i64> @test_fptoui_2xi64(<2 x half> %a) #0 { ; CHECK-NEXT: .reg .b64 %rd<3>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: ld.param.v2.b16 {%rs1, %rs2}, [test_fptoui_2xi64_param_0]; +; CHECK-NEXT: ld.param.b32 %r1, [test_fptoui_2xi64_param_0]; +; CHECK-NEXT: mov.b32 {%rs1, %rs2}, %r1; ; CHECK-NEXT: cvt.rzi.u64.f16 %rd1, %rs2; ; CHECK-NEXT: cvt.rzi.u64.f16 %rd2, %rs1; ; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd2, %rd1}; @@ -1369,16 +1433,17 @@ define <2 x half> @test_uitofp_2xi32_fadd(<2 x i32> %a, <2 x half> %b) #0 { ; CHECK-NOF16-EMPTY: ; CHECK-NOF16-NEXT: // %bb.0: ; CHECK-NOF16-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_uitofp_2xi32_fadd_param_0]; +; CHECK-NOF16-NEXT: ld.param.b32 %r3, [test_uitofp_2xi32_fadd_param_1]; ; CHECK-NOF16-NEXT: cvt.rn.f16.u32 %rs1, %r1; ; CHECK-NOF16-NEXT: cvt.rn.f16.u32 %rs2, %r2; -; CHECK-NOF16-NEXT: ld.param.v2.b16 {%rs3, %rs4}, [test_uitofp_2xi32_fadd_param_1]; -; CHECK-NOF16-NEXT: cvt.f32.f16 %r4, %rs4; -; CHECK-NOF16-NEXT: cvt.f32.f16 %r5, %rs2; -; CHECK-NOF16-NEXT: add.rn.f32 %r6, %r4, %r5; +; CHECK-NOF16-NEXT: cvt.f32.f16 %r4, %rs2; +; CHECK-NOF16-NEXT: mov.b32 {%rs3, %rs4}, %r3; +; CHECK-NOF16-NEXT: cvt.f32.f16 %r5, %rs4; +; CHECK-NOF16-NEXT: add.rn.f32 %r6, %r5, %r4; ; CHECK-NOF16-NEXT: cvt.rn.f16.f32 %rs5, %r6; -; CHECK-NOF16-NEXT: cvt.f32.f16 %r7, %rs3; -; CHECK-NOF16-NEXT: cvt.f32.f16 %r8, %rs1; -; CHECK-NOF16-NEXT: add.rn.f32 %r9, %r7, %r8; +; CHECK-NOF16-NEXT: cvt.f32.f16 %r7, %rs1; +; CHECK-NOF16-NEXT: cvt.f32.f16 %r8, %rs3; +; CHECK-NOF16-NEXT: add.rn.f32 %r9, %r8, %r7; ; CHECK-NOF16-NEXT: cvt.rn.f16.f32 %rs6, %r9; ; CHECK-NOF16-NEXT: mov.b32 %r10, {%rs6, %rs5}; ; CHECK-NOF16-NEXT: st.param.b32 [func_retval0], %r10; @@ -1411,16 +1476,17 @@ define <2 x half> @test_sitofp_2xi32_fadd(<2 x i32> %a, <2 x half> %b) #0 { ; CHECK-NOF16-EMPTY: ; CHECK-NOF16-NEXT: // %bb.0: ; CHECK-NOF16-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_sitofp_2xi32_fadd_param_0]; +; CHECK-NOF16-NEXT: ld.param.b32 %r3, [test_sitofp_2xi32_fadd_param_1]; ; CHECK-NOF16-NEXT: cvt.rn.f16.s32 %rs1, %r1; ; CHECK-NOF16-NEXT: cvt.rn.f16.s32 %rs2, %r2; -; CHECK-NOF16-NEXT: ld.param.v2.b16 {%rs3, %rs4}, [test_sitofp_2xi32_fadd_param_1]; -; CHECK-NOF16-NEXT: cvt.f32.f16 %r4, %rs4; -; CHECK-NOF16-NEXT: cvt.f32.f16 %r5, %rs2; -; CHECK-NOF16-NEXT: add.rn.f32 %r6, %r4, %r5; +; CHECK-NOF16-NEXT: cvt.f32.f16 %r4, %rs2; +; CHECK-NOF16-NEXT: mov.b32 {%rs3, %rs4}, %r3; +; CHECK-NOF16-NEXT: cvt.f32.f16 %r5, %rs4; +; CHECK-NOF16-NEXT: add.rn.f32 %r6, %r5, %r4; ; CHECK-NOF16-NEXT: cvt.rn.f16.f32 %rs5, %r6; -; CHECK-NOF16-NEXT: cvt.f32.f16 %r7, %rs3; -; CHECK-NOF16-NEXT: cvt.f32.f16 %r8, %rs1; -; CHECK-NOF16-NEXT: add.rn.f32 %r9, %r7, %r8; +; CHECK-NOF16-NEXT: cvt.f32.f16 %r7, %rs1; +; CHECK-NOF16-NEXT: cvt.f32.f16 %r8, %rs3; +; CHECK-NOF16-NEXT: add.rn.f32 %r9, %r8, %r7; ; CHECK-NOF16-NEXT: cvt.rn.f16.f32 %rs6, %r9; ; CHECK-NOF16-NEXT: mov.b32 %r10, {%rs6, %rs5}; ; CHECK-NOF16-NEXT: st.param.b32 [func_retval0], %r10; @@ -1433,11 +1499,17 @@ define <2 x half> @test_sitofp_2xi32_fadd(<2 x i32> %a, <2 x half> %b) #0 { define <2 x half> @test_fptrunc_2xfloat(<2 x float> %a) #0 { ; CHECK-LABEL: test_fptrunc_2xfloat( ; CHECK: { +; CHECK-NEXT: .reg .b16 %rs<3>; +; CHECK-NEXT: .reg .b32 %r<4>; ; CHECK-NEXT: .reg .b64 %rd<2>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b64 %rd1, [test_fptrunc_2xfloat_param_0]; -; CHECK-NEXT: st.param.b32 [func_retval0], %rd1; +; CHECK-NEXT: mov.b64 {%r1, %r2}, %rd1; +; CHECK-NEXT: cvt.rn.f16.f32 %rs1, %r2; +; CHECK-NEXT: cvt.rn.f16.f32 %rs2, %r1; +; CHECK-NEXT: mov.b32 %r3, {%rs2, %rs1}; +; CHECK-NEXT: st.param.b32 [func_retval0], %r3; ; CHECK-NEXT: ret; %r = fptrunc <2 x float> %a to <2 x half> ret <2 x half> %r @@ -1468,7 +1540,8 @@ define <2 x float> @test_fpext_2xfloat(<2 x half> %a) #0 { ; CHECK-NEXT: .reg .b32 %r<4>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: ld.param.v2.b16 {%rs1, %rs2}, [test_fpext_2xfloat_param_0]; +; CHECK-NEXT: ld.param.b32 %r1, [test_fpext_2xfloat_param_0]; +; CHECK-NEXT: mov.b32 {%rs1, %rs2}, %r1; ; CHECK-NEXT: cvt.f32.f16 %r2, %rs2; ; CHECK-NEXT: cvt.f32.f16 %r3, %rs1; ; CHECK-NEXT: st.param.v2.b32 [func_retval0], {%r3, %r2}; @@ -1485,7 +1558,8 @@ define <2 x double> @test_fpext_2xdouble(<2 x half> %a) #0 { ; CHECK-NEXT: .reg .b64 %rd<3>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: ld.param.v2.b16 {%rs1, %rs2}, [test_fpext_2xdouble_param_0]; +; CHECK-NEXT: ld.param.b32 %r1, [test_fpext_2xdouble_param_0]; +; CHECK-NEXT: mov.b32 {%rs1, %rs2}, %r1; ; CHECK-NEXT: cvt.f64.f16 %rd1, %rs2; ; CHECK-NEXT: cvt.f64.f16 %rd2, %rs1; ; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd2, %rd1}; @@ -1578,7 +1652,8 @@ define <2 x half> @test_sqrt(<2 x half> %a) #0 { ; CHECK-NEXT: .reg .b32 %r<7>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: ld.param.v2.b16 {%rs1, %rs2}, [test_sqrt_param_0]; +; CHECK-NEXT: ld.param.b32 %r1, [test_sqrt_param_0]; +; CHECK-NEXT: mov.b32 {%rs1, %rs2}, %r1; ; CHECK-NEXT: cvt.f32.f16 %r2, %rs2; ; CHECK-NEXT: sqrt.rn.f32 %r3, %r2; ; CHECK-NEXT: cvt.rn.f16.f32 %rs3, %r3; @@ -1606,7 +1681,8 @@ define <2 x half> @test_sin(<2 x half> %a) #0 #1 { ; CHECK-NEXT: .reg .b32 %r<7>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: ld.param.v2.b16 {%rs1, %rs2}, [test_sin_param_0]; +; CHECK-NEXT: ld.param.b32 %r1, [test_sin_param_0]; +; CHECK-NEXT: mov.b32 {%rs1, %rs2}, %r1; ; CHECK-NEXT: cvt.f32.f16 %r2, %rs2; ; CHECK-NEXT: sin.approx.f32 %r3, %r2; ; CHECK-NEXT: cvt.rn.f16.f32 %rs3, %r3; @@ -1627,7 +1703,8 @@ define <2 x half> @test_cos(<2 x half> %a) #0 #1 { ; CHECK-NEXT: .reg .b32 %r<7>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: ld.param.v2.b16 {%rs1, %rs2}, [test_cos_param_0]; +; CHECK-NEXT: ld.param.b32 %r1, [test_cos_param_0]; +; CHECK-NEXT: mov.b32 {%rs1, %rs2}, %r1; ; CHECK-NEXT: cvt.f32.f16 %r2, %rs2; ; CHECK-NEXT: cos.approx.f32 %r3, %r2; ; CHECK-NEXT: cvt.rn.f16.f32 %rs3, %r3; @@ -1703,17 +1780,20 @@ define <2 x half> @test_fma(<2 x half> %a, <2 x half> %b, <2 x half> %c) #0 { ; CHECK-NOF16-NEXT: .reg .b32 %r<13>; ; CHECK-NOF16-EMPTY: ; CHECK-NOF16-NEXT: // %bb.0: -; CHECK-NOF16-NEXT: ld.param.v2.b16 {%rs1, %rs2}, [test_fma_param_0]; -; CHECK-NOF16-NEXT: ld.param.v2.b16 {%rs3, %rs4}, [test_fma_param_2]; -; CHECK-NOF16-NEXT: cvt.f32.f16 %r4, %rs4; -; CHECK-NOF16-NEXT: ld.param.v2.b16 {%rs5, %rs6}, [test_fma_param_1]; -; CHECK-NOF16-NEXT: cvt.f32.f16 %r5, %rs6; -; CHECK-NOF16-NEXT: cvt.f32.f16 %r6, %rs2; +; CHECK-NOF16-NEXT: ld.param.b32 %r3, [test_fma_param_2]; +; CHECK-NOF16-NEXT: ld.param.b32 %r2, [test_fma_param_1]; +; CHECK-NOF16-NEXT: ld.param.b32 %r1, [test_fma_param_0]; +; CHECK-NOF16-NEXT: mov.b32 {%rs1, %rs2}, %r3; +; CHECK-NOF16-NEXT: cvt.f32.f16 %r4, %rs2; +; CHECK-NOF16-NEXT: mov.b32 {%rs3, %rs4}, %r2; +; CHECK-NOF16-NEXT: cvt.f32.f16 %r5, %rs4; +; CHECK-NOF16-NEXT: mov.b32 {%rs5, %rs6}, %r1; +; CHECK-NOF16-NEXT: cvt.f32.f16 %r6, %rs6; ; CHECK-NOF16-NEXT: fma.rn.f32 %r7, %r6, %r5, %r4; ; CHECK-NOF16-NEXT: cvt.rn.f16.f32 %rs7, %r7; -; CHECK-NOF16-NEXT: cvt.f32.f16 %r8, %rs3; -; CHECK-NOF16-NEXT: cvt.f32.f16 %r9, %rs5; -; CHECK-NOF16-NEXT: cvt.f32.f16 %r10, %rs1; +; CHECK-NOF16-NEXT: cvt.f32.f16 %r8, %rs1; +; CHECK-NOF16-NEXT: cvt.f32.f16 %r9, %rs3; +; CHECK-NOF16-NEXT: cvt.f32.f16 %r10, %rs5; ; CHECK-NOF16-NEXT: fma.rn.f32 %r11, %r10, %r9, %r8; ; CHECK-NOF16-NEXT: cvt.rn.f16.f32 %rs8, %r11; ; CHECK-NOF16-NEXT: mov.b32 %r12, {%rs8, %rs7}; @@ -1740,7 +1820,8 @@ define <2 x half> @test_fabs(<2 x half> %a) #0 { ; CHECK-NOF16-NEXT: .reg .b32 %r<7>; ; CHECK-NOF16-EMPTY: ; CHECK-NOF16-NEXT: // %bb.0: -; CHECK-NOF16-NEXT: ld.param.v2.b16 {%rs1, %rs2}, [test_fabs_param_0]; +; CHECK-NOF16-NEXT: ld.param.b32 %r1, [test_fabs_param_0]; +; CHECK-NOF16-NEXT: mov.b32 {%rs1, %rs2}, %r1; ; CHECK-NOF16-NEXT: cvt.f32.f16 %r2, %rs2; ; CHECK-NOF16-NEXT: abs.f32 %r3, %r2; ; CHECK-NOF16-NEXT: cvt.rn.f16.f32 %rs3, %r3; @@ -1761,14 +1842,16 @@ define <2 x half> @test_minnum(<2 x half> %a, <2 x half> %b) #0 { ; CHECK-NEXT: .reg .b32 %r<10>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: ld.param.v2.b16 {%rs1, %rs2}, [test_minnum_param_0]; -; CHECK-NEXT: ld.param.v2.b16 {%rs3, %rs4}, [test_minnum_param_1]; -; CHECK-NEXT: cvt.f32.f16 %r3, %rs4; -; CHECK-NEXT: cvt.f32.f16 %r4, %rs2; +; CHECK-NEXT: ld.param.b32 %r2, [test_minnum_param_1]; +; CHECK-NEXT: ld.param.b32 %r1, [test_minnum_param_0]; +; CHECK-NEXT: mov.b32 {%rs1, %rs2}, %r2; +; CHECK-NEXT: cvt.f32.f16 %r3, %rs2; +; CHECK-NEXT: mov.b32 {%rs3, %rs4}, %r1; +; CHECK-NEXT: cvt.f32.f16 %r4, %rs4; ; CHECK-NEXT: min.f32 %r5, %r4, %r3; ; CHECK-NEXT: cvt.rn.f16.f32 %rs5, %r5; -; CHECK-NEXT: cvt.f32.f16 %r6, %rs3; -; CHECK-NEXT: cvt.f32.f16 %r7, %rs1; +; CHECK-NEXT: cvt.f32.f16 %r6, %rs1; +; CHECK-NEXT: cvt.f32.f16 %r7, %rs3; ; CHECK-NEXT: min.f32 %r8, %r7, %r6; ; CHECK-NEXT: cvt.rn.f16.f32 %rs6, %r8; ; CHECK-NEXT: mov.b32 %r9, {%rs6, %rs5}; @@ -1785,14 +1868,16 @@ define <2 x half> @test_maxnum(<2 x half> %a, <2 x half> %b) #0 { ; CHECK-NEXT: .reg .b32 %r<10>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: ld.param.v2.b16 {%rs1, %rs2}, [test_maxnum_param_0]; -; CHECK-NEXT: ld.param.v2.b16 {%rs3, %rs4}, [test_maxnum_param_1]; -; CHECK-NEXT: cvt.f32.f16 %r3, %rs4; -; CHECK-NEXT: cvt.f32.f16 %r4, %rs2; +; CHECK-NEXT: ld.param.b32 %r2, [test_maxnum_param_1]; +; CHECK-NEXT: ld.param.b32 %r1, [test_maxnum_param_0]; +; CHECK-NEXT: mov.b32 {%rs1, %rs2}, %r2; +; CHECK-NEXT: cvt.f32.f16 %r3, %rs2; +; CHECK-NEXT: mov.b32 {%rs3, %rs4}, %r1; +; CHECK-NEXT: cvt.f32.f16 %r4, %rs4; ; CHECK-NEXT: max.f32 %r5, %r4, %r3; ; CHECK-NEXT: cvt.rn.f16.f32 %rs5, %r5; -; CHECK-NEXT: cvt.f32.f16 %r6, %rs3; -; CHECK-NEXT: cvt.f32.f16 %r7, %rs1; +; CHECK-NEXT: cvt.f32.f16 %r6, %rs1; +; CHECK-NEXT: cvt.f32.f16 %r7, %rs3; ; CHECK-NEXT: max.f32 %r8, %r7, %r6; ; CHECK-NEXT: cvt.rn.f16.f32 %rs6, %r8; ; CHECK-NEXT: mov.b32 %r9, {%rs6, %rs5}; @@ -1822,13 +1907,15 @@ define <2 x half> @test_copysign(<2 x half> %a, <2 x half> %b) #0 { ; CHECK-NOF16-NEXT: .reg .b32 %r<3>; ; CHECK-NOF16-EMPTY: ; CHECK-NOF16-NEXT: // %bb.0: -; CHECK-NOF16-NEXT: ld.param.v2.b16 {%rs1, %rs2}, [test_copysign_param_0]; -; CHECK-NOF16-NEXT: ld.param.v2.b16 {%rs3, %rs4}, [test_copysign_param_1]; -; CHECK-NOF16-NEXT: and.b16 %rs5, %rs4, -32768; -; CHECK-NOF16-NEXT: and.b16 %rs6, %rs2, 32767; -; CHECK-NOF16-NEXT: or.b16 %rs7, %rs6, %rs5; -; CHECK-NOF16-NEXT: and.b16 %rs8, %rs3, -32768; -; CHECK-NOF16-NEXT: and.b16 %rs9, %rs1, 32767; +; CHECK-NOF16-NEXT: ld.param.b32 %r2, [test_copysign_param_1]; +; CHECK-NOF16-NEXT: ld.param.b32 %r1, [test_copysign_param_0]; +; CHECK-NOF16-NEXT: mov.b32 {%rs1, %rs2}, %r2; +; CHECK-NOF16-NEXT: and.b16 %rs3, %rs2, -32768; +; CHECK-NOF16-NEXT: mov.b32 {%rs4, %rs5}, %r1; +; CHECK-NOF16-NEXT: and.b16 %rs6, %rs5, 32767; +; CHECK-NOF16-NEXT: or.b16 %rs7, %rs6, %rs3; +; CHECK-NOF16-NEXT: and.b16 %rs8, %rs1, -32768; +; CHECK-NOF16-NEXT: and.b16 %rs9, %rs4, 32767; ; CHECK-NOF16-NEXT: or.b16 %rs10, %rs9, %rs8; ; CHECK-NOF16-NEXT: st.param.v2.b16 [func_retval0], {%rs10, %rs7}; ; CHECK-NOF16-NEXT: ret; @@ -1844,8 +1931,9 @@ define <2 x half> @test_copysign_f32(<2 x half> %a, <2 x float> %b) #0 { ; CHECK-F16-NEXT: .reg .b64 %rd<2>; ; CHECK-F16-EMPTY: ; CHECK-F16-NEXT: // %bb.0: +; CHECK-F16-NEXT: ld.param.b64 %rd1, [test_copysign_f32_param_1]; ; CHECK-F16-NEXT: ld.param.b32 %r1, [test_copysign_f32_param_0]; -; CHECK-F16-NEXT: ld.param.v2.b32 {%r2, %r3}, [test_copysign_f32_param_1]; +; CHECK-F16-NEXT: mov.b64 {%r2, %r3}, %rd1; ; CHECK-F16-NEXT: cvt.rn.f16.f32 %rs1, %r3; ; CHECK-F16-NEXT: cvt.rn.f16.f32 %rs2, %r2; ; CHECK-F16-NEXT: mov.b32 %r4, {%rs2, %rs1}; @@ -1862,8 +1950,10 @@ define <2 x half> @test_copysign_f32(<2 x half> %a, <2 x float> %b) #0 { ; CHECK-NOF16-NEXT: .reg .b64 %rd<2>; ; CHECK-NOF16-EMPTY: ; CHECK-NOF16-NEXT: // %bb.0: -; CHECK-NOF16-NEXT: ld.param.v2.b16 {%rs1, %rs2}, [test_copysign_f32_param_0]; -; CHECK-NOF16-NEXT: ld.param.v2.b32 {%r2, %r3}, [test_copysign_f32_param_1]; +; CHECK-NOF16-NEXT: ld.param.b64 %rd1, [test_copysign_f32_param_1]; +; CHECK-NOF16-NEXT: ld.param.b32 %r1, [test_copysign_f32_param_0]; +; CHECK-NOF16-NEXT: mov.b64 {%r2, %r3}, %rd1; +; CHECK-NOF16-NEXT: mov.b32 {%rs1, %rs2}, %r1; ; CHECK-NOF16-NEXT: and.b16 %rs3, %rs2, 32767; ; CHECK-NOF16-NEXT: and.b32 %r4, %r3, -2147483648; ; CHECK-NOF16-NEXT: { .reg .b16 tmp; mov.b32 {tmp, %rs4}, %r4; } @@ -1906,7 +1996,8 @@ define <2 x half> @test_copysign_f64(<2 x half> %a, <2 x double> %b) #0 { ; CHECK-NOF16-EMPTY: ; CHECK-NOF16-NEXT: // %bb.0: ; CHECK-NOF16-NEXT: ld.param.v2.b64 {%rd1, %rd2}, [test_copysign_f64_param_1]; -; CHECK-NOF16-NEXT: ld.param.v2.b16 {%rs1, %rs2}, [test_copysign_f64_param_0]; +; CHECK-NOF16-NEXT: ld.param.b32 %r1, [test_copysign_f64_param_0]; +; CHECK-NOF16-NEXT: mov.b32 {%rs1, %rs2}, %r1; ; CHECK-NOF16-NEXT: and.b16 %rs3, %rs2, 32767; ; CHECK-NOF16-NEXT: and.b64 %rd3, %rd2, -9223372036854775808; ; CHECK-NOF16-NEXT: shr.u64 %rd4, %rd3, 48; @@ -1948,13 +2039,15 @@ define <2 x float> @test_copysign_extended(<2 x half> %a, <2 x half> %b) #0 { ; CHECK-NOF16-NEXT: .reg .b32 %r<5>; ; CHECK-NOF16-EMPTY: ; CHECK-NOF16-NEXT: // %bb.0: -; CHECK-NOF16-NEXT: ld.param.v2.b16 {%rs1, %rs2}, [test_copysign_extended_param_0]; -; CHECK-NOF16-NEXT: ld.param.v2.b16 {%rs3, %rs4}, [test_copysign_extended_param_1]; -; CHECK-NOF16-NEXT: and.b16 %rs5, %rs3, -32768; -; CHECK-NOF16-NEXT: and.b16 %rs6, %rs1, 32767; -; CHECK-NOF16-NEXT: or.b16 %rs7, %rs6, %rs5; -; CHECK-NOF16-NEXT: and.b16 %rs8, %rs4, -32768; -; CHECK-NOF16-NEXT: and.b16 %rs9, %rs2, 32767; +; CHECK-NOF16-NEXT: ld.param.b32 %r2, [test_copysign_extended_param_1]; +; CHECK-NOF16-NEXT: ld.param.b32 %r1, [test_copysign_extended_param_0]; +; CHECK-NOF16-NEXT: mov.b32 {%rs1, %rs2}, %r2; +; CHECK-NOF16-NEXT: and.b16 %rs3, %rs1, -32768; +; CHECK-NOF16-NEXT: mov.b32 {%rs4, %rs5}, %r1; +; CHECK-NOF16-NEXT: and.b16 %rs6, %rs4, 32767; +; CHECK-NOF16-NEXT: or.b16 %rs7, %rs6, %rs3; +; CHECK-NOF16-NEXT: and.b16 %rs8, %rs2, -32768; +; CHECK-NOF16-NEXT: and.b16 %rs9, %rs5, 32767; ; CHECK-NOF16-NEXT: or.b16 %rs10, %rs9, %rs8; ; CHECK-NOF16-NEXT: cvt.f32.f16 %r3, %rs10; ; CHECK-NOF16-NEXT: cvt.f32.f16 %r4, %rs7; @@ -1972,7 +2065,8 @@ define <2 x half> @test_floor(<2 x half> %a) #0 { ; CHECK-NEXT: .reg .b32 %r<2>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: ld.param.v2.b16 {%rs1, %rs2}, [test_floor_param_0]; +; CHECK-NEXT: ld.param.b32 %r1, [test_floor_param_0]; +; CHECK-NEXT: mov.b32 {%rs1, %rs2}, %r1; ; CHECK-NEXT: cvt.rmi.f16.f16 %rs3, %rs2; ; CHECK-NEXT: cvt.rmi.f16.f16 %rs4, %rs1; ; CHECK-NEXT: st.param.v2.b16 [func_retval0], {%rs4, %rs3}; @@ -1988,7 +2082,8 @@ define <2 x half> @test_ceil(<2 x half> %a) #0 { ; CHECK-NEXT: .reg .b32 %r<2>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: ld.param.v2.b16 {%rs1, %rs2}, [test_ceil_param_0]; +; CHECK-NEXT: ld.param.b32 %r1, [test_ceil_param_0]; +; CHECK-NEXT: mov.b32 {%rs1, %rs2}, %r1; ; CHECK-NEXT: cvt.rpi.f16.f16 %rs3, %rs2; ; CHECK-NEXT: cvt.rpi.f16.f16 %rs4, %rs1; ; CHECK-NEXT: st.param.v2.b16 [func_retval0], {%rs4, %rs3}; @@ -2004,7 +2099,8 @@ define <2 x half> @test_trunc(<2 x half> %a) #0 { ; CHECK-NEXT: .reg .b32 %r<2>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: ld.param.v2.b16 {%rs1, %rs2}, [test_trunc_param_0]; +; CHECK-NEXT: ld.param.b32 %r1, [test_trunc_param_0]; +; CHECK-NEXT: mov.b32 {%rs1, %rs2}, %r1; ; CHECK-NEXT: cvt.rzi.f16.f16 %rs3, %rs2; ; CHECK-NEXT: cvt.rzi.f16.f16 %rs4, %rs1; ; CHECK-NEXT: st.param.v2.b16 [func_retval0], {%rs4, %rs3}; @@ -2020,7 +2116,8 @@ define <2 x half> @test_rint(<2 x half> %a) #0 { ; CHECK-NEXT: .reg .b32 %r<2>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: ld.param.v2.b16 {%rs1, %rs2}, [test_rint_param_0]; +; CHECK-NEXT: ld.param.b32 %r1, [test_rint_param_0]; +; CHECK-NEXT: mov.b32 {%rs1, %rs2}, %r1; ; CHECK-NEXT: cvt.rni.f16.f16 %rs3, %rs2; ; CHECK-NEXT: cvt.rni.f16.f16 %rs4, %rs1; ; CHECK-NEXT: st.param.v2.b16 [func_retval0], {%rs4, %rs3}; @@ -2036,7 +2133,8 @@ define <2 x half> @test_nearbyint(<2 x half> %a) #0 { ; CHECK-NEXT: .reg .b32 %r<2>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: ld.param.v2.b16 {%rs1, %rs2}, [test_nearbyint_param_0]; +; CHECK-NEXT: ld.param.b32 %r1, [test_nearbyint_param_0]; +; CHECK-NEXT: mov.b32 {%rs1, %rs2}, %r1; ; CHECK-NEXT: cvt.rni.f16.f16 %rs3, %rs2; ; CHECK-NEXT: cvt.rni.f16.f16 %rs4, %rs1; ; CHECK-NEXT: st.param.v2.b16 [func_retval0], {%rs4, %rs3}; @@ -2052,7 +2150,8 @@ define <2 x half> @test_roundeven(<2 x half> %a) #0 { ; CHECK-NEXT: .reg .b32 %r<2>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: ld.param.v2.b16 {%rs1, %rs2}, [test_roundeven_param_0]; +; CHECK-NEXT: ld.param.b32 %r1, [test_roundeven_param_0]; +; CHECK-NEXT: mov.b32 {%rs1, %rs2}, %r1; ; CHECK-NEXT: cvt.rni.f16.f16 %rs3, %rs2; ; CHECK-NEXT: cvt.rni.f16.f16 %rs4, %rs1; ; CHECK-NEXT: st.param.v2.b16 [func_retval0], {%rs4, %rs3}; @@ -2070,7 +2169,8 @@ define <2 x half> @test_round(<2 x half> %a) #0 { ; CHECK-NEXT: .reg .b32 %r<21>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: ld.param.v2.b16 {%rs1, %rs2}, [test_round_param_0]; +; CHECK-NEXT: ld.param.b32 %r1, [test_round_param_0]; +; CHECK-NEXT: mov.b32 {%rs1, %rs2}, %r1; ; CHECK-NEXT: cvt.f32.f16 %r2, %rs2; ; CHECK-NEXT: and.b32 %r3, %r2, -2147483648; ; CHECK-NEXT: or.b32 %r4, %r3, 1056964608; @@ -2121,17 +2221,20 @@ define <2 x half> @test_fmuladd(<2 x half> %a, <2 x half> %b, <2 x half> %c) #0 ; CHECK-NOF16-NEXT: .reg .b32 %r<13>; ; CHECK-NOF16-EMPTY: ; CHECK-NOF16-NEXT: // %bb.0: -; CHECK-NOF16-NEXT: ld.param.v2.b16 {%rs1, %rs2}, [test_fmuladd_param_0]; -; CHECK-NOF16-NEXT: ld.param.v2.b16 {%rs3, %rs4}, [test_fmuladd_param_2]; -; CHECK-NOF16-NEXT: cvt.f32.f16 %r4, %rs4; -; CHECK-NOF16-NEXT: ld.param.v2.b16 {%rs5, %rs6}, [test_fmuladd_param_1]; -; CHECK-NOF16-NEXT: cvt.f32.f16 %r5, %rs6; -; CHECK-NOF16-NEXT: cvt.f32.f16 %r6, %rs2; +; CHECK-NOF16-NEXT: ld.param.b32 %r3, [test_fmuladd_param_2]; +; CHECK-NOF16-NEXT: ld.param.b32 %r2, [test_fmuladd_param_1]; +; CHECK-NOF16-NEXT: ld.param.b32 %r1, [test_fmuladd_param_0]; +; CHECK-NOF16-NEXT: mov.b32 {%rs1, %rs2}, %r3; +; CHECK-NOF16-NEXT: cvt.f32.f16 %r4, %rs2; +; CHECK-NOF16-NEXT: mov.b32 {%rs3, %rs4}, %r2; +; CHECK-NOF16-NEXT: cvt.f32.f16 %r5, %rs4; +; CHECK-NOF16-NEXT: mov.b32 {%rs5, %rs6}, %r1; +; CHECK-NOF16-NEXT: cvt.f32.f16 %r6, %rs6; ; CHECK-NOF16-NEXT: fma.rn.f32 %r7, %r6, %r5, %r4; ; CHECK-NOF16-NEXT: cvt.rn.f16.f32 %rs7, %r7; -; CHECK-NOF16-NEXT: cvt.f32.f16 %r8, %rs3; -; CHECK-NOF16-NEXT: cvt.f32.f16 %r9, %rs5; -; CHECK-NOF16-NEXT: cvt.f32.f16 %r10, %rs1; +; CHECK-NOF16-NEXT: cvt.f32.f16 %r8, %rs1; +; CHECK-NOF16-NEXT: cvt.f32.f16 %r9, %rs3; +; CHECK-NOF16-NEXT: cvt.f32.f16 %r10, %rs5; ; CHECK-NOF16-NEXT: fma.rn.f32 %r11, %r10, %r9, %r8; ; CHECK-NOF16-NEXT: cvt.rn.f16.f32 %rs8, %r11; ; CHECK-NOF16-NEXT: mov.b32 %r12, {%rs8, %rs7}; @@ -2148,7 +2251,8 @@ define <2 x half> @test_shufflevector(<2 x half> %a) #0 { ; CHECK-NEXT: .reg .b32 %r<2>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: ld.param.v2.b16 {%rs1, %rs2}, [test_shufflevector_param_0]; +; CHECK-NEXT: ld.param.b32 %r1, [test_shufflevector_param_0]; +; CHECK-NEXT: mov.b32 {%rs1, %rs2}, %r1; ; CHECK-NEXT: st.param.v2.b16 [func_retval0], {%rs2, %rs1}; ; CHECK-NEXT: ret; %s = shufflevector <2 x half> %a, <2 x half> undef, <2 x i32> <i32 1, i32 0> @@ -2158,12 +2262,13 @@ define <2 x half> @test_shufflevector(<2 x half> %a) #0 { define <2 x half> @test_insertelement(<2 x half> %a, half %x) #0 { ; CHECK-LABEL: test_insertelement( ; CHECK: { -; CHECK-NEXT: .reg .b16 %rs<4>; +; CHECK-NEXT: .reg .b16 %rs<3>; ; CHECK-NEXT: .reg .b32 %r<2>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b16 %rs1, [test_insertelement_param_1]; -; CHECK-NEXT: ld.param.v2.b16 {%rs2, %rs3}, [test_insertelement_param_0]; +; CHECK-NEXT: ld.param.b32 %r1, [test_insertelement_param_0]; +; CHECK-NEXT: { .reg .b16 tmp; mov.b32 {%rs2, tmp}, %r1; } ; CHECK-NEXT: st.param.v2.b16 [func_retval0], {%rs2, %rs1}; ; CHECK-NEXT: ret; %i = insertelement <2 x half> %a, half %x, i64 1 @@ -2177,7 +2282,8 @@ define <2 x half> @test_sitofp_2xi16_to_2xhalf(<2 x i16> %a) #0 { ; CHECK-NEXT: .reg .b32 %r<2>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: ld.param.v2.b16 {%rs1, %rs2}, [test_sitofp_2xi16_to_2xhalf_param_0]; +; CHECK-NEXT: ld.param.b32 %r1, [test_sitofp_2xi16_to_2xhalf_param_0]; +; CHECK-NEXT: mov.b32 {%rs1, %rs2}, %r1; ; CHECK-NEXT: cvt.rn.f16.s16 %rs3, %rs2; ; CHECK-NEXT: cvt.rn.f16.s16 %rs4, %rs1; ; CHECK-NEXT: st.param.v2.b16 [func_retval0], {%rs4, %rs3}; @@ -2193,7 +2299,8 @@ define <2 x half> @test_uitofp_2xi16_to_2xhalf(<2 x i16> %a) #0 { ; CHECK-NEXT: .reg .b32 %r<2>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: ld.param.v2.b16 {%rs1, %rs2}, [test_uitofp_2xi16_to_2xhalf_param_0]; +; CHECK-NEXT: ld.param.b32 %r1, [test_uitofp_2xi16_to_2xhalf_param_0]; +; CHECK-NEXT: mov.b32 {%rs1, %rs2}, %r1; ; CHECK-NEXT: cvt.rn.f16.u16 %rs3, %rs2; ; CHECK-NEXT: cvt.rn.f16.u16 %rs4, %rs1; ; CHECK-NEXT: st.param.v2.b16 [func_retval0], {%rs4, %rs3}; |