diff options
Diffstat (limited to 'llvm/test/CodeGen/NVPTX/f32x2-instructions.ll')
-rw-r--r-- | llvm/test/CodeGen/NVPTX/f32x2-instructions.ll | 680 |
1 files changed, 434 insertions, 246 deletions
diff --git a/llvm/test/CodeGen/NVPTX/f32x2-instructions.ll b/llvm/test/CodeGen/NVPTX/f32x2-instructions.ll index af3cb63..30afd69 100644 --- a/llvm/test/CodeGen/NVPTX/f32x2-instructions.ll +++ b/llvm/test/CodeGen/NVPTX/f32x2-instructions.ll @@ -28,29 +28,53 @@ define <2 x float> @test_ret_const() #0 { } define float @test_extract_0(<2 x float> %a) #0 { -; CHECK-LABEL: test_extract_0( -; CHECK: { -; CHECK-NEXT: .reg .b32 %r<3>; -; CHECK-NEXT: .reg .b64 %rd<2>; -; CHECK-EMPTY: -; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_extract_0_param_0]; -; CHECK-NEXT: st.param.b32 [func_retval0], %r1; -; CHECK-NEXT: ret; +; CHECK-NOF32X2-LABEL: test_extract_0( +; CHECK-NOF32X2: { +; CHECK-NOF32X2-NEXT: .reg .b32 %r<2>; +; CHECK-NOF32X2-NEXT: .reg .b64 %rd<2>; +; CHECK-NOF32X2-EMPTY: +; CHECK-NOF32X2-NEXT: // %bb.0: +; CHECK-NOF32X2-NEXT: ld.param.b64 %rd1, [test_extract_0_param_0]; +; CHECK-NOF32X2-NEXT: { .reg .b32 tmp; mov.b64 {%r1, tmp}, %rd1; } +; CHECK-NOF32X2-NEXT: st.param.b32 [func_retval0], %r1; +; CHECK-NOF32X2-NEXT: ret; +; +; CHECK-F32X2-LABEL: test_extract_0( +; CHECK-F32X2: { +; CHECK-F32X2-NEXT: .reg .b32 %r<2>; +; CHECK-F32X2-NEXT: .reg .b64 %rd<2>; +; CHECK-F32X2-EMPTY: +; CHECK-F32X2-NEXT: // %bb.0: +; CHECK-F32X2-NEXT: ld.param.b64 %rd1, [test_extract_0_param_0]; +; CHECK-F32X2-NEXT: mov.b64 {%r1, _}, %rd1; +; CHECK-F32X2-NEXT: st.param.b32 [func_retval0], %r1; +; CHECK-F32X2-NEXT: ret; %e = extractelement <2 x float> %a, i32 0 ret float %e } define float @test_extract_1(<2 x float> %a) #0 { -; CHECK-LABEL: test_extract_1( -; CHECK: { -; CHECK-NEXT: .reg .b32 %r<3>; -; CHECK-NEXT: .reg .b64 %rd<2>; -; CHECK-EMPTY: -; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_extract_1_param_0]; -; CHECK-NEXT: st.param.b32 [func_retval0], %r2; -; CHECK-NEXT: ret; +; CHECK-NOF32X2-LABEL: test_extract_1( +; CHECK-NOF32X2: { +; CHECK-NOF32X2-NEXT: .reg .b32 %r<2>; +; CHECK-NOF32X2-NEXT: .reg .b64 %rd<2>; +; CHECK-NOF32X2-EMPTY: +; CHECK-NOF32X2-NEXT: // %bb.0: +; CHECK-NOF32X2-NEXT: ld.param.b64 %rd1, [test_extract_1_param_0]; +; CHECK-NOF32X2-NEXT: { .reg .b32 tmp; mov.b64 {tmp, %r1}, %rd1; } +; CHECK-NOF32X2-NEXT: st.param.b32 [func_retval0], %r1; +; CHECK-NOF32X2-NEXT: ret; +; +; CHECK-F32X2-LABEL: test_extract_1( +; CHECK-F32X2: { +; CHECK-F32X2-NEXT: .reg .b32 %r<2>; +; CHECK-F32X2-NEXT: .reg .b64 %rd<2>; +; CHECK-F32X2-EMPTY: +; CHECK-F32X2-NEXT: // %bb.0: +; CHECK-F32X2-NEXT: ld.param.b64 %rd1, [test_extract_1_param_0]; +; CHECK-F32X2-NEXT: mov.b64 {_, %r1}, %rd1; +; CHECK-F32X2-NEXT: st.param.b32 [func_retval0], %r1; +; CHECK-F32X2-NEXT: ret; %e = extractelement <2 x float> %a, i32 1 ret float %e } @@ -70,10 +94,12 @@ define <2 x float> @test_fadd(<2 x float> %a, <2 x float> %b) #0 { ; CHECK-NOF32X2-NEXT: .reg .b64 %rd<3>; ; CHECK-NOF32X2-EMPTY: ; CHECK-NOF32X2-NEXT: // %bb.0: -; CHECK-NOF32X2-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_fadd_param_0]; -; CHECK-NOF32X2-NEXT: ld.param.v2.b32 {%r3, %r4}, [test_fadd_param_1]; -; CHECK-NOF32X2-NEXT: add.rn.f32 %r5, %r2, %r4; -; CHECK-NOF32X2-NEXT: add.rn.f32 %r6, %r1, %r3; +; CHECK-NOF32X2-NEXT: ld.param.b64 %rd2, [test_fadd_param_1]; +; CHECK-NOF32X2-NEXT: ld.param.b64 %rd1, [test_fadd_param_0]; +; CHECK-NOF32X2-NEXT: mov.b64 {%r1, %r2}, %rd2; +; CHECK-NOF32X2-NEXT: mov.b64 {%r3, %r4}, %rd1; +; CHECK-NOF32X2-NEXT: add.rn.f32 %r5, %r4, %r2; +; CHECK-NOF32X2-NEXT: add.rn.f32 %r6, %r3, %r1; ; CHECK-NOF32X2-NEXT: st.param.v2.b32 [func_retval0], {%r6, %r5}; ; CHECK-NOF32X2-NEXT: ret; ; @@ -98,7 +124,8 @@ define <2 x float> @test_fadd_imm_0(<2 x float> %a) #0 { ; CHECK-NOF32X2-NEXT: .reg .b64 %rd<2>; ; CHECK-NOF32X2-EMPTY: ; CHECK-NOF32X2-NEXT: // %bb.0: -; CHECK-NOF32X2-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_fadd_imm_0_param_0]; +; CHECK-NOF32X2-NEXT: ld.param.b64 %rd1, [test_fadd_imm_0_param_0]; +; CHECK-NOF32X2-NEXT: mov.b64 {%r1, %r2}, %rd1; ; CHECK-NOF32X2-NEXT: add.rn.f32 %r3, %r2, 0f40000000; ; CHECK-NOF32X2-NEXT: add.rn.f32 %r4, %r1, 0f3F800000; ; CHECK-NOF32X2-NEXT: st.param.v2.b32 [func_retval0], {%r4, %r3}; @@ -128,7 +155,8 @@ define <2 x float> @test_fadd_imm_1(<2 x float> %a) #0 { ; CHECK-NOF32X2-NEXT: .reg .b64 %rd<2>; ; CHECK-NOF32X2-EMPTY: ; CHECK-NOF32X2-NEXT: // %bb.0: -; CHECK-NOF32X2-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_fadd_imm_1_param_0]; +; CHECK-NOF32X2-NEXT: ld.param.b64 %rd1, [test_fadd_imm_1_param_0]; +; CHECK-NOF32X2-NEXT: mov.b64 {%r1, %r2}, %rd1; ; CHECK-NOF32X2-NEXT: add.rn.f32 %r3, %r2, 0f40000000; ; CHECK-NOF32X2-NEXT: add.rn.f32 %r4, %r1, 0f3F800000; ; CHECK-NOF32X2-NEXT: st.param.v2.b32 [func_retval0], {%r4, %r3}; @@ -158,13 +186,17 @@ define <4 x float> @test_fadd_v4(<4 x float> %a, <4 x float> %b) #0 { ; CHECK-NOF32X2-NEXT: .reg .b64 %rd<5>; ; CHECK-NOF32X2-EMPTY: ; CHECK-NOF32X2-NEXT: // %bb.0: -; CHECK-NOF32X2-NEXT: ld.param.v4.b32 {%r1, %r2, %r3, %r4}, [test_fadd_v4_param_0]; -; CHECK-NOF32X2-NEXT: ld.param.v4.b32 {%r5, %r6, %r7, %r8}, [test_fadd_v4_param_1]; -; CHECK-NOF32X2-NEXT: add.rn.f32 %r9, %r4, %r8; -; CHECK-NOF32X2-NEXT: add.rn.f32 %r10, %r3, %r7; -; CHECK-NOF32X2-NEXT: add.rn.f32 %r11, %r2, %r6; -; CHECK-NOF32X2-NEXT: add.rn.f32 %r12, %r1, %r5; -; CHECK-NOF32X2-NEXT: st.param.v4.b32 [func_retval0], {%r12, %r11, %r10, %r9}; +; CHECK-NOF32X2-NEXT: ld.param.v2.b64 {%rd3, %rd4}, [test_fadd_v4_param_1]; +; CHECK-NOF32X2-NEXT: ld.param.v2.b64 {%rd1, %rd2}, [test_fadd_v4_param_0]; +; CHECK-NOF32X2-NEXT: mov.b64 {%r1, %r2}, %rd4; +; CHECK-NOF32X2-NEXT: mov.b64 {%r3, %r4}, %rd2; +; CHECK-NOF32X2-NEXT: add.rn.f32 %r5, %r4, %r2; +; CHECK-NOF32X2-NEXT: add.rn.f32 %r6, %r3, %r1; +; CHECK-NOF32X2-NEXT: mov.b64 {%r7, %r8}, %rd3; +; CHECK-NOF32X2-NEXT: mov.b64 {%r9, %r10}, %rd1; +; CHECK-NOF32X2-NEXT: add.rn.f32 %r11, %r10, %r8; +; CHECK-NOF32X2-NEXT: add.rn.f32 %r12, %r9, %r7; +; CHECK-NOF32X2-NEXT: st.param.v4.b32 [func_retval0], {%r12, %r11, %r6, %r5}; ; CHECK-NOF32X2-NEXT: ret; ; ; CHECK-F32X2-LABEL: test_fadd_v4( @@ -189,12 +221,14 @@ define <4 x float> @test_fadd_imm_0_v4(<4 x float> %a) #0 { ; CHECK-NOF32X2-NEXT: .reg .b64 %rd<3>; ; CHECK-NOF32X2-EMPTY: ; CHECK-NOF32X2-NEXT: // %bb.0: -; CHECK-NOF32X2-NEXT: ld.param.v4.b32 {%r1, %r2, %r3, %r4}, [test_fadd_imm_0_v4_param_0]; -; CHECK-NOF32X2-NEXT: add.rn.f32 %r5, %r4, 0f40800000; -; CHECK-NOF32X2-NEXT: add.rn.f32 %r6, %r3, 0f40400000; -; CHECK-NOF32X2-NEXT: add.rn.f32 %r7, %r2, 0f40000000; -; CHECK-NOF32X2-NEXT: add.rn.f32 %r8, %r1, 0f3F800000; -; CHECK-NOF32X2-NEXT: st.param.v4.b32 [func_retval0], {%r8, %r7, %r6, %r5}; +; CHECK-NOF32X2-NEXT: ld.param.v2.b64 {%rd1, %rd2}, [test_fadd_imm_0_v4_param_0]; +; CHECK-NOF32X2-NEXT: mov.b64 {%r1, %r2}, %rd2; +; CHECK-NOF32X2-NEXT: add.rn.f32 %r3, %r2, 0f40800000; +; CHECK-NOF32X2-NEXT: add.rn.f32 %r4, %r1, 0f40400000; +; CHECK-NOF32X2-NEXT: mov.b64 {%r5, %r6}, %rd1; +; CHECK-NOF32X2-NEXT: add.rn.f32 %r7, %r6, 0f40000000; +; CHECK-NOF32X2-NEXT: add.rn.f32 %r8, %r5, 0f3F800000; +; CHECK-NOF32X2-NEXT: st.param.v4.b32 [func_retval0], {%r8, %r7, %r4, %r3}; ; CHECK-NOF32X2-NEXT: ret; ; ; CHECK-F32X2-LABEL: test_fadd_imm_0_v4( @@ -225,12 +259,14 @@ define <4 x float> @test_fadd_imm_1_v4(<4 x float> %a) #0 { ; CHECK-NOF32X2-NEXT: .reg .b64 %rd<3>; ; CHECK-NOF32X2-EMPTY: ; CHECK-NOF32X2-NEXT: // %bb.0: -; CHECK-NOF32X2-NEXT: ld.param.v4.b32 {%r1, %r2, %r3, %r4}, [test_fadd_imm_1_v4_param_0]; -; CHECK-NOF32X2-NEXT: add.rn.f32 %r5, %r4, 0f40800000; -; CHECK-NOF32X2-NEXT: add.rn.f32 %r6, %r3, 0f40400000; -; CHECK-NOF32X2-NEXT: add.rn.f32 %r7, %r2, 0f40000000; -; CHECK-NOF32X2-NEXT: add.rn.f32 %r8, %r1, 0f3F800000; -; CHECK-NOF32X2-NEXT: st.param.v4.b32 [func_retval0], {%r8, %r7, %r6, %r5}; +; CHECK-NOF32X2-NEXT: ld.param.v2.b64 {%rd1, %rd2}, [test_fadd_imm_1_v4_param_0]; +; CHECK-NOF32X2-NEXT: mov.b64 {%r1, %r2}, %rd2; +; CHECK-NOF32X2-NEXT: add.rn.f32 %r3, %r2, 0f40800000; +; CHECK-NOF32X2-NEXT: add.rn.f32 %r4, %r1, 0f40400000; +; CHECK-NOF32X2-NEXT: mov.b64 {%r5, %r6}, %rd1; +; CHECK-NOF32X2-NEXT: add.rn.f32 %r7, %r6, 0f40000000; +; CHECK-NOF32X2-NEXT: add.rn.f32 %r8, %r5, 0f3F800000; +; CHECK-NOF32X2-NEXT: st.param.v4.b32 [func_retval0], {%r8, %r7, %r4, %r3}; ; CHECK-NOF32X2-NEXT: ret; ; ; CHECK-F32X2-LABEL: test_fadd_imm_1_v4( @@ -261,10 +297,12 @@ define <2 x float> @test_fsub(<2 x float> %a, <2 x float> %b) #0 { ; CHECK-NOF32X2-NEXT: .reg .b64 %rd<3>; ; CHECK-NOF32X2-EMPTY: ; CHECK-NOF32X2-NEXT: // %bb.0: -; CHECK-NOF32X2-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_fsub_param_0]; -; CHECK-NOF32X2-NEXT: ld.param.v2.b32 {%r3, %r4}, [test_fsub_param_1]; -; CHECK-NOF32X2-NEXT: sub.rn.f32 %r5, %r2, %r4; -; CHECK-NOF32X2-NEXT: sub.rn.f32 %r6, %r1, %r3; +; CHECK-NOF32X2-NEXT: ld.param.b64 %rd2, [test_fsub_param_1]; +; CHECK-NOF32X2-NEXT: ld.param.b64 %rd1, [test_fsub_param_0]; +; CHECK-NOF32X2-NEXT: mov.b64 {%r1, %r2}, %rd2; +; CHECK-NOF32X2-NEXT: mov.b64 {%r3, %r4}, %rd1; +; CHECK-NOF32X2-NEXT: sub.rn.f32 %r5, %r4, %r2; +; CHECK-NOF32X2-NEXT: sub.rn.f32 %r6, %r3, %r1; ; CHECK-NOF32X2-NEXT: st.param.v2.b32 [func_retval0], {%r6, %r5}; ; CHECK-NOF32X2-NEXT: ret; ; @@ -289,7 +327,8 @@ define <2 x float> @test_fneg(<2 x float> %a) #0 { ; CHECK-NEXT: .reg .b64 %rd<2>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_fneg_param_0]; +; CHECK-NEXT: ld.param.b64 %rd1, [test_fneg_param_0]; +; CHECK-NEXT: mov.b64 {%r1, %r2}, %rd1; ; CHECK-NEXT: neg.f32 %r3, %r2; ; CHECK-NEXT: neg.f32 %r4, %r1; ; CHECK-NEXT: st.param.v2.b32 [func_retval0], {%r4, %r3}; @@ -305,10 +344,12 @@ define <2 x float> @test_fmul(<2 x float> %a, <2 x float> %b) #0 { ; CHECK-NOF32X2-NEXT: .reg .b64 %rd<3>; ; CHECK-NOF32X2-EMPTY: ; CHECK-NOF32X2-NEXT: // %bb.0: -; CHECK-NOF32X2-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_fmul_param_0]; -; CHECK-NOF32X2-NEXT: ld.param.v2.b32 {%r3, %r4}, [test_fmul_param_1]; -; CHECK-NOF32X2-NEXT: mul.rn.f32 %r5, %r2, %r4; -; CHECK-NOF32X2-NEXT: mul.rn.f32 %r6, %r1, %r3; +; CHECK-NOF32X2-NEXT: ld.param.b64 %rd2, [test_fmul_param_1]; +; CHECK-NOF32X2-NEXT: ld.param.b64 %rd1, [test_fmul_param_0]; +; CHECK-NOF32X2-NEXT: mov.b64 {%r1, %r2}, %rd2; +; CHECK-NOF32X2-NEXT: mov.b64 {%r3, %r4}, %rd1; +; CHECK-NOF32X2-NEXT: mul.rn.f32 %r5, %r4, %r2; +; CHECK-NOF32X2-NEXT: mul.rn.f32 %r6, %r3, %r1; ; CHECK-NOF32X2-NEXT: st.param.v2.b32 [func_retval0], {%r6, %r5}; ; CHECK-NOF32X2-NEXT: ret; ; @@ -333,10 +374,12 @@ define <2 x float> @test_fdiv(<2 x float> %a, <2 x float> %b) #0 { ; CHECK-NEXT: .reg .b64 %rd<3>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_fdiv_param_0]; -; CHECK-NEXT: ld.param.v2.b32 {%r3, %r4}, [test_fdiv_param_1]; -; CHECK-NEXT: div.rn.f32 %r5, %r2, %r4; -; CHECK-NEXT: div.rn.f32 %r6, %r1, %r3; +; CHECK-NEXT: ld.param.b64 %rd2, [test_fdiv_param_1]; +; CHECK-NEXT: ld.param.b64 %rd1, [test_fdiv_param_0]; +; CHECK-NEXT: mov.b64 {%r1, %r2}, %rd2; +; CHECK-NEXT: mov.b64 {%r3, %r4}, %rd1; +; CHECK-NEXT: div.rn.f32 %r5, %r4, %r2; +; CHECK-NEXT: div.rn.f32 %r6, %r3, %r1; ; CHECK-NEXT: st.param.v2.b32 [func_retval0], {%r6, %r5}; ; CHECK-NEXT: ret; %r = fdiv <2 x float> %a, %b @@ -351,20 +394,22 @@ define <2 x float> @test_frem(<2 x float> %a, <2 x float> %b) #0 { ; CHECK-NEXT: .reg .b64 %rd<3>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_frem_param_0]; -; CHECK-NEXT: ld.param.v2.b32 {%r3, %r4}, [test_frem_param_1]; -; CHECK-NEXT: div.rn.f32 %r5, %r2, %r4; +; CHECK-NEXT: ld.param.b64 %rd2, [test_frem_param_1]; +; CHECK-NEXT: ld.param.b64 %rd1, [test_frem_param_0]; +; CHECK-NEXT: mov.b64 {%r1, %r2}, %rd2; +; CHECK-NEXT: mov.b64 {%r3, %r4}, %rd1; +; CHECK-NEXT: div.rn.f32 %r5, %r4, %r2; ; CHECK-NEXT: cvt.rzi.f32.f32 %r6, %r5; ; CHECK-NEXT: neg.f32 %r7, %r6; -; CHECK-NEXT: fma.rn.f32 %r8, %r7, %r4, %r2; -; CHECK-NEXT: testp.infinite.f32 %p1, %r4; -; CHECK-NEXT: selp.f32 %r9, %r2, %r8, %p1; -; CHECK-NEXT: div.rn.f32 %r10, %r1, %r3; +; CHECK-NEXT: fma.rn.f32 %r8, %r7, %r2, %r4; +; CHECK-NEXT: testp.infinite.f32 %p1, %r2; +; CHECK-NEXT: selp.f32 %r9, %r4, %r8, %p1; +; CHECK-NEXT: div.rn.f32 %r10, %r3, %r1; ; CHECK-NEXT: cvt.rzi.f32.f32 %r11, %r10; ; CHECK-NEXT: neg.f32 %r12, %r11; -; CHECK-NEXT: fma.rn.f32 %r13, %r12, %r3, %r1; -; CHECK-NEXT: testp.infinite.f32 %p2, %r3; -; CHECK-NEXT: selp.f32 %r14, %r1, %r13, %p2; +; CHECK-NEXT: fma.rn.f32 %r13, %r12, %r1, %r3; +; CHECK-NEXT: testp.infinite.f32 %p2, %r1; +; CHECK-NEXT: selp.f32 %r14, %r3, %r13, %p2; ; CHECK-NEXT: st.param.v2.b32 [func_retval0], {%r14, %r9}; ; CHECK-NEXT: ret; %r = frem <2 x float> %a, %b @@ -378,10 +423,12 @@ define <2 x float> @test_fadd_ftz(<2 x float> %a, <2 x float> %b) #2 { ; CHECK-NOF32X2-NEXT: .reg .b64 %rd<3>; ; CHECK-NOF32X2-EMPTY: ; CHECK-NOF32X2-NEXT: // %bb.0: -; CHECK-NOF32X2-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_fadd_ftz_param_0]; -; CHECK-NOF32X2-NEXT: ld.param.v2.b32 {%r3, %r4}, [test_fadd_ftz_param_1]; -; CHECK-NOF32X2-NEXT: add.rn.ftz.f32 %r5, %r2, %r4; -; CHECK-NOF32X2-NEXT: add.rn.ftz.f32 %r6, %r1, %r3; +; CHECK-NOF32X2-NEXT: ld.param.b64 %rd2, [test_fadd_ftz_param_1]; +; CHECK-NOF32X2-NEXT: ld.param.b64 %rd1, [test_fadd_ftz_param_0]; +; CHECK-NOF32X2-NEXT: mov.b64 {%r1, %r2}, %rd2; +; CHECK-NOF32X2-NEXT: mov.b64 {%r3, %r4}, %rd1; +; CHECK-NOF32X2-NEXT: add.rn.ftz.f32 %r5, %r4, %r2; +; CHECK-NOF32X2-NEXT: add.rn.ftz.f32 %r6, %r3, %r1; ; CHECK-NOF32X2-NEXT: st.param.v2.b32 [func_retval0], {%r6, %r5}; ; CHECK-NOF32X2-NEXT: ret; ; @@ -406,7 +453,8 @@ define <2 x float> @test_fadd_imm_0_ftz(<2 x float> %a) #2 { ; CHECK-NOF32X2-NEXT: .reg .b64 %rd<2>; ; CHECK-NOF32X2-EMPTY: ; CHECK-NOF32X2-NEXT: // %bb.0: -; CHECK-NOF32X2-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_fadd_imm_0_ftz_param_0]; +; CHECK-NOF32X2-NEXT: ld.param.b64 %rd1, [test_fadd_imm_0_ftz_param_0]; +; CHECK-NOF32X2-NEXT: mov.b64 {%r1, %r2}, %rd1; ; CHECK-NOF32X2-NEXT: add.rn.ftz.f32 %r3, %r2, 0f40000000; ; CHECK-NOF32X2-NEXT: add.rn.ftz.f32 %r4, %r1, 0f3F800000; ; CHECK-NOF32X2-NEXT: st.param.v2.b32 [func_retval0], {%r4, %r3}; @@ -436,7 +484,8 @@ define <2 x float> @test_fadd_imm_1_ftz(<2 x float> %a) #2 { ; CHECK-NOF32X2-NEXT: .reg .b64 %rd<2>; ; CHECK-NOF32X2-EMPTY: ; CHECK-NOF32X2-NEXT: // %bb.0: -; CHECK-NOF32X2-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_fadd_imm_1_ftz_param_0]; +; CHECK-NOF32X2-NEXT: ld.param.b64 %rd1, [test_fadd_imm_1_ftz_param_0]; +; CHECK-NOF32X2-NEXT: mov.b64 {%r1, %r2}, %rd1; ; CHECK-NOF32X2-NEXT: add.rn.ftz.f32 %r3, %r2, 0f40000000; ; CHECK-NOF32X2-NEXT: add.rn.ftz.f32 %r4, %r1, 0f3F800000; ; CHECK-NOF32X2-NEXT: st.param.v2.b32 [func_retval0], {%r4, %r3}; @@ -466,13 +515,17 @@ define <4 x float> @test_fadd_v4_ftz(<4 x float> %a, <4 x float> %b) #2 { ; CHECK-NOF32X2-NEXT: .reg .b64 %rd<5>; ; CHECK-NOF32X2-EMPTY: ; CHECK-NOF32X2-NEXT: // %bb.0: -; CHECK-NOF32X2-NEXT: ld.param.v4.b32 {%r1, %r2, %r3, %r4}, [test_fadd_v4_ftz_param_0]; -; CHECK-NOF32X2-NEXT: ld.param.v4.b32 {%r5, %r6, %r7, %r8}, [test_fadd_v4_ftz_param_1]; -; CHECK-NOF32X2-NEXT: add.rn.ftz.f32 %r9, %r4, %r8; -; CHECK-NOF32X2-NEXT: add.rn.ftz.f32 %r10, %r3, %r7; -; CHECK-NOF32X2-NEXT: add.rn.ftz.f32 %r11, %r2, %r6; -; CHECK-NOF32X2-NEXT: add.rn.ftz.f32 %r12, %r1, %r5; -; CHECK-NOF32X2-NEXT: st.param.v4.b32 [func_retval0], {%r12, %r11, %r10, %r9}; +; CHECK-NOF32X2-NEXT: ld.param.v2.b64 {%rd3, %rd4}, [test_fadd_v4_ftz_param_1]; +; CHECK-NOF32X2-NEXT: ld.param.v2.b64 {%rd1, %rd2}, [test_fadd_v4_ftz_param_0]; +; CHECK-NOF32X2-NEXT: mov.b64 {%r1, %r2}, %rd4; +; CHECK-NOF32X2-NEXT: mov.b64 {%r3, %r4}, %rd2; +; CHECK-NOF32X2-NEXT: add.rn.ftz.f32 %r5, %r4, %r2; +; CHECK-NOF32X2-NEXT: add.rn.ftz.f32 %r6, %r3, %r1; +; CHECK-NOF32X2-NEXT: mov.b64 {%r7, %r8}, %rd3; +; CHECK-NOF32X2-NEXT: mov.b64 {%r9, %r10}, %rd1; +; CHECK-NOF32X2-NEXT: add.rn.ftz.f32 %r11, %r10, %r8; +; CHECK-NOF32X2-NEXT: add.rn.ftz.f32 %r12, %r9, %r7; +; CHECK-NOF32X2-NEXT: st.param.v4.b32 [func_retval0], {%r12, %r11, %r6, %r5}; ; CHECK-NOF32X2-NEXT: ret; ; ; CHECK-F32X2-LABEL: test_fadd_v4_ftz( @@ -497,12 +550,14 @@ define <4 x float> @test_fadd_imm_0_v4_ftz(<4 x float> %a) #2 { ; CHECK-NOF32X2-NEXT: .reg .b64 %rd<3>; ; CHECK-NOF32X2-EMPTY: ; CHECK-NOF32X2-NEXT: // %bb.0: -; CHECK-NOF32X2-NEXT: ld.param.v4.b32 {%r1, %r2, %r3, %r4}, [test_fadd_imm_0_v4_ftz_param_0]; -; CHECK-NOF32X2-NEXT: add.rn.ftz.f32 %r5, %r4, 0f40800000; -; CHECK-NOF32X2-NEXT: add.rn.ftz.f32 %r6, %r3, 0f40400000; -; CHECK-NOF32X2-NEXT: add.rn.ftz.f32 %r7, %r2, 0f40000000; -; CHECK-NOF32X2-NEXT: add.rn.ftz.f32 %r8, %r1, 0f3F800000; -; CHECK-NOF32X2-NEXT: st.param.v4.b32 [func_retval0], {%r8, %r7, %r6, %r5}; +; CHECK-NOF32X2-NEXT: ld.param.v2.b64 {%rd1, %rd2}, [test_fadd_imm_0_v4_ftz_param_0]; +; CHECK-NOF32X2-NEXT: mov.b64 {%r1, %r2}, %rd2; +; CHECK-NOF32X2-NEXT: add.rn.ftz.f32 %r3, %r2, 0f40800000; +; CHECK-NOF32X2-NEXT: add.rn.ftz.f32 %r4, %r1, 0f40400000; +; CHECK-NOF32X2-NEXT: mov.b64 {%r5, %r6}, %rd1; +; CHECK-NOF32X2-NEXT: add.rn.ftz.f32 %r7, %r6, 0f40000000; +; CHECK-NOF32X2-NEXT: add.rn.ftz.f32 %r8, %r5, 0f3F800000; +; CHECK-NOF32X2-NEXT: st.param.v4.b32 [func_retval0], {%r8, %r7, %r4, %r3}; ; CHECK-NOF32X2-NEXT: ret; ; ; CHECK-F32X2-LABEL: test_fadd_imm_0_v4_ftz( @@ -533,12 +588,14 @@ define <4 x float> @test_fadd_imm_1_v4_ftz(<4 x float> %a) #2 { ; CHECK-NOF32X2-NEXT: .reg .b64 %rd<3>; ; CHECK-NOF32X2-EMPTY: ; CHECK-NOF32X2-NEXT: // %bb.0: -; CHECK-NOF32X2-NEXT: ld.param.v4.b32 {%r1, %r2, %r3, %r4}, [test_fadd_imm_1_v4_ftz_param_0]; -; CHECK-NOF32X2-NEXT: add.rn.ftz.f32 %r5, %r4, 0f40800000; -; CHECK-NOF32X2-NEXT: add.rn.ftz.f32 %r6, %r3, 0f40400000; -; CHECK-NOF32X2-NEXT: add.rn.ftz.f32 %r7, %r2, 0f40000000; -; CHECK-NOF32X2-NEXT: add.rn.ftz.f32 %r8, %r1, 0f3F800000; -; CHECK-NOF32X2-NEXT: st.param.v4.b32 [func_retval0], {%r8, %r7, %r6, %r5}; +; CHECK-NOF32X2-NEXT: ld.param.v2.b64 {%rd1, %rd2}, [test_fadd_imm_1_v4_ftz_param_0]; +; CHECK-NOF32X2-NEXT: mov.b64 {%r1, %r2}, %rd2; +; CHECK-NOF32X2-NEXT: add.rn.ftz.f32 %r3, %r2, 0f40800000; +; CHECK-NOF32X2-NEXT: add.rn.ftz.f32 %r4, %r1, 0f40400000; +; CHECK-NOF32X2-NEXT: mov.b64 {%r5, %r6}, %rd1; +; CHECK-NOF32X2-NEXT: add.rn.ftz.f32 %r7, %r6, 0f40000000; +; CHECK-NOF32X2-NEXT: add.rn.ftz.f32 %r8, %r5, 0f3F800000; +; CHECK-NOF32X2-NEXT: st.param.v4.b32 [func_retval0], {%r8, %r7, %r4, %r3}; ; CHECK-NOF32X2-NEXT: ret; ; ; CHECK-F32X2-LABEL: test_fadd_imm_1_v4_ftz( @@ -569,10 +626,12 @@ define <2 x float> @test_fsub_ftz(<2 x float> %a, <2 x float> %b) #2 { ; CHECK-NOF32X2-NEXT: .reg .b64 %rd<3>; ; CHECK-NOF32X2-EMPTY: ; CHECK-NOF32X2-NEXT: // %bb.0: -; CHECK-NOF32X2-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_fsub_ftz_param_0]; -; CHECK-NOF32X2-NEXT: ld.param.v2.b32 {%r3, %r4}, [test_fsub_ftz_param_1]; -; CHECK-NOF32X2-NEXT: sub.rn.ftz.f32 %r5, %r2, %r4; -; CHECK-NOF32X2-NEXT: sub.rn.ftz.f32 %r6, %r1, %r3; +; CHECK-NOF32X2-NEXT: ld.param.b64 %rd2, [test_fsub_ftz_param_1]; +; CHECK-NOF32X2-NEXT: ld.param.b64 %rd1, [test_fsub_ftz_param_0]; +; CHECK-NOF32X2-NEXT: mov.b64 {%r1, %r2}, %rd2; +; CHECK-NOF32X2-NEXT: mov.b64 {%r3, %r4}, %rd1; +; CHECK-NOF32X2-NEXT: sub.rn.ftz.f32 %r5, %r4, %r2; +; CHECK-NOF32X2-NEXT: sub.rn.ftz.f32 %r6, %r3, %r1; ; CHECK-NOF32X2-NEXT: st.param.v2.b32 [func_retval0], {%r6, %r5}; ; CHECK-NOF32X2-NEXT: ret; ; @@ -597,7 +656,8 @@ define <2 x float> @test_fneg_ftz(<2 x float> %a) #2 { ; CHECK-NEXT: .reg .b64 %rd<2>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_fneg_ftz_param_0]; +; CHECK-NEXT: ld.param.b64 %rd1, [test_fneg_ftz_param_0]; +; CHECK-NEXT: mov.b64 {%r1, %r2}, %rd1; ; CHECK-NEXT: neg.ftz.f32 %r3, %r2; ; CHECK-NEXT: neg.ftz.f32 %r4, %r1; ; CHECK-NEXT: st.param.v2.b32 [func_retval0], {%r4, %r3}; @@ -613,10 +673,12 @@ define <2 x float> @test_fmul_ftz(<2 x float> %a, <2 x float> %b) #2 { ; CHECK-NOF32X2-NEXT: .reg .b64 %rd<3>; ; CHECK-NOF32X2-EMPTY: ; CHECK-NOF32X2-NEXT: // %bb.0: -; CHECK-NOF32X2-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_fmul_ftz_param_0]; -; CHECK-NOF32X2-NEXT: ld.param.v2.b32 {%r3, %r4}, [test_fmul_ftz_param_1]; -; CHECK-NOF32X2-NEXT: mul.rn.ftz.f32 %r5, %r2, %r4; -; CHECK-NOF32X2-NEXT: mul.rn.ftz.f32 %r6, %r1, %r3; +; CHECK-NOF32X2-NEXT: ld.param.b64 %rd2, [test_fmul_ftz_param_1]; +; CHECK-NOF32X2-NEXT: ld.param.b64 %rd1, [test_fmul_ftz_param_0]; +; CHECK-NOF32X2-NEXT: mov.b64 {%r1, %r2}, %rd2; +; CHECK-NOF32X2-NEXT: mov.b64 {%r3, %r4}, %rd1; +; CHECK-NOF32X2-NEXT: mul.rn.ftz.f32 %r5, %r4, %r2; +; CHECK-NOF32X2-NEXT: mul.rn.ftz.f32 %r6, %r3, %r1; ; CHECK-NOF32X2-NEXT: st.param.v2.b32 [func_retval0], {%r6, %r5}; ; CHECK-NOF32X2-NEXT: ret; ; @@ -641,11 +703,14 @@ define <2 x float> @test_fma_ftz(<2 x float> %a, <2 x float> %b, <2 x float> %c) ; CHECK-NOF32X2-NEXT: .reg .b64 %rd<4>; ; CHECK-NOF32X2-EMPTY: ; CHECK-NOF32X2-NEXT: // %bb.0: -; CHECK-NOF32X2-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_fma_ftz_param_0]; -; CHECK-NOF32X2-NEXT: ld.param.v2.b32 {%r3, %r4}, [test_fma_ftz_param_1]; -; CHECK-NOF32X2-NEXT: ld.param.v2.b32 {%r5, %r6}, [test_fma_ftz_param_2]; -; CHECK-NOF32X2-NEXT: fma.rn.ftz.f32 %r7, %r2, %r4, %r6; -; CHECK-NOF32X2-NEXT: fma.rn.ftz.f32 %r8, %r1, %r3, %r5; +; CHECK-NOF32X2-NEXT: ld.param.b64 %rd3, [test_fma_ftz_param_2]; +; CHECK-NOF32X2-NEXT: ld.param.b64 %rd2, [test_fma_ftz_param_1]; +; CHECK-NOF32X2-NEXT: ld.param.b64 %rd1, [test_fma_ftz_param_0]; +; CHECK-NOF32X2-NEXT: mov.b64 {%r1, %r2}, %rd3; +; CHECK-NOF32X2-NEXT: mov.b64 {%r3, %r4}, %rd2; +; CHECK-NOF32X2-NEXT: mov.b64 {%r5, %r6}, %rd1; +; CHECK-NOF32X2-NEXT: fma.rn.ftz.f32 %r7, %r6, %r4, %r2; +; CHECK-NOF32X2-NEXT: fma.rn.ftz.f32 %r8, %r5, %r3, %r1; ; CHECK-NOF32X2-NEXT: st.param.v2.b32 [func_retval0], {%r8, %r7}; ; CHECK-NOF32X2-NEXT: ret; ; @@ -671,10 +736,12 @@ define <2 x float> @test_fdiv_ftz(<2 x float> %a, <2 x float> %b) #2 { ; CHECK-NEXT: .reg .b64 %rd<3>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_fdiv_ftz_param_0]; -; CHECK-NEXT: ld.param.v2.b32 {%r3, %r4}, [test_fdiv_ftz_param_1]; -; CHECK-NEXT: div.rn.ftz.f32 %r5, %r2, %r4; -; CHECK-NEXT: div.rn.ftz.f32 %r6, %r1, %r3; +; CHECK-NEXT: ld.param.b64 %rd2, [test_fdiv_ftz_param_1]; +; CHECK-NEXT: ld.param.b64 %rd1, [test_fdiv_ftz_param_0]; +; CHECK-NEXT: mov.b64 {%r1, %r2}, %rd2; +; CHECK-NEXT: mov.b64 {%r3, %r4}, %rd1; +; CHECK-NEXT: div.rn.ftz.f32 %r5, %r4, %r2; +; CHECK-NEXT: div.rn.ftz.f32 %r6, %r3, %r1; ; CHECK-NEXT: st.param.v2.b32 [func_retval0], {%r6, %r5}; ; CHECK-NEXT: ret; %r = fdiv <2 x float> %a, %b @@ -689,20 +756,22 @@ define <2 x float> @test_frem_ftz(<2 x float> %a, <2 x float> %b) #2 { ; CHECK-NEXT: .reg .b64 %rd<3>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_frem_ftz_param_0]; -; CHECK-NEXT: ld.param.v2.b32 {%r3, %r4}, [test_frem_ftz_param_1]; -; CHECK-NEXT: div.rn.ftz.f32 %r5, %r2, %r4; +; CHECK-NEXT: ld.param.b64 %rd2, [test_frem_ftz_param_1]; +; CHECK-NEXT: ld.param.b64 %rd1, [test_frem_ftz_param_0]; +; CHECK-NEXT: mov.b64 {%r1, %r2}, %rd2; +; CHECK-NEXT: mov.b64 {%r3, %r4}, %rd1; +; CHECK-NEXT: div.rn.ftz.f32 %r5, %r4, %r2; ; CHECK-NEXT: cvt.rzi.ftz.f32.f32 %r6, %r5; ; CHECK-NEXT: neg.ftz.f32 %r7, %r6; -; CHECK-NEXT: fma.rn.ftz.f32 %r8, %r7, %r4, %r2; -; CHECK-NEXT: testp.infinite.f32 %p1, %r4; -; CHECK-NEXT: selp.f32 %r9, %r2, %r8, %p1; -; CHECK-NEXT: div.rn.ftz.f32 %r10, %r1, %r3; +; CHECK-NEXT: fma.rn.ftz.f32 %r8, %r7, %r2, %r4; +; CHECK-NEXT: testp.infinite.f32 %p1, %r2; +; CHECK-NEXT: selp.f32 %r9, %r4, %r8, %p1; +; CHECK-NEXT: div.rn.ftz.f32 %r10, %r3, %r1; ; CHECK-NEXT: cvt.rzi.ftz.f32.f32 %r11, %r10; ; CHECK-NEXT: neg.ftz.f32 %r12, %r11; -; CHECK-NEXT: fma.rn.ftz.f32 %r13, %r12, %r3, %r1; -; CHECK-NEXT: testp.infinite.f32 %p2, %r3; -; CHECK-NEXT: selp.f32 %r14, %r1, %r13, %p2; +; CHECK-NEXT: fma.rn.ftz.f32 %r13, %r12, %r1, %r3; +; CHECK-NEXT: testp.infinite.f32 %p2, %r1; +; CHECK-NEXT: selp.f32 %r14, %r3, %r13, %p2; ; CHECK-NEXT: st.param.v2.b32 [func_retval0], {%r14, %r9}; ; CHECK-NEXT: ret; %r = frem <2 x float> %a, %b @@ -877,14 +946,18 @@ define <2 x float> @test_select_cc(<2 x float> %a, <2 x float> %b, <2 x float> % ; CHECK-NEXT: .reg .b64 %rd<5>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_select_cc_param_0]; -; CHECK-NEXT: ld.param.v2.b32 {%r3, %r4}, [test_select_cc_param_2]; -; CHECK-NEXT: ld.param.v2.b32 {%r5, %r6}, [test_select_cc_param_3]; -; CHECK-NEXT: setp.neu.f32 %p1, %r3, %r5; -; CHECK-NEXT: setp.neu.f32 %p2, %r4, %r6; -; CHECK-NEXT: ld.param.v2.b32 {%r7, %r8}, [test_select_cc_param_1]; -; CHECK-NEXT: selp.f32 %r9, %r2, %r8, %p2; -; CHECK-NEXT: selp.f32 %r10, %r1, %r7, %p1; +; CHECK-NEXT: ld.param.b64 %rd4, [test_select_cc_param_3]; +; CHECK-NEXT: ld.param.b64 %rd3, [test_select_cc_param_2]; +; CHECK-NEXT: ld.param.b64 %rd2, [test_select_cc_param_1]; +; CHECK-NEXT: ld.param.b64 %rd1, [test_select_cc_param_0]; +; CHECK-NEXT: mov.b64 {%r1, %r2}, %rd4; +; CHECK-NEXT: mov.b64 {%r3, %r4}, %rd3; +; CHECK-NEXT: setp.neu.f32 %p1, %r3, %r1; +; CHECK-NEXT: setp.neu.f32 %p2, %r4, %r2; +; CHECK-NEXT: mov.b64 {%r5, %r6}, %rd2; +; CHECK-NEXT: mov.b64 {%r7, %r8}, %rd1; +; CHECK-NEXT: selp.f32 %r9, %r8, %r6, %p2; +; CHECK-NEXT: selp.f32 %r10, %r7, %r5, %p1; ; CHECK-NEXT: st.param.v2.b32 [func_retval0], {%r10, %r9}; ; CHECK-NEXT: ret; %cc = fcmp une <2 x float> %c, %d @@ -902,10 +975,12 @@ define <2 x double> @test_select_cc_f64_f32(<2 x double> %a, <2 x double> %b, <2 ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.v2.b64 {%rd3, %rd4}, [test_select_cc_f64_f32_param_1]; ; CHECK-NEXT: ld.param.v2.b64 {%rd1, %rd2}, [test_select_cc_f64_f32_param_0]; -; CHECK-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_select_cc_f64_f32_param_2]; -; CHECK-NEXT: ld.param.v2.b32 {%r3, %r4}, [test_select_cc_f64_f32_param_3]; -; CHECK-NEXT: setp.neu.f32 %p1, %r1, %r3; -; CHECK-NEXT: setp.neu.f32 %p2, %r2, %r4; +; CHECK-NEXT: ld.param.b64 %rd6, [test_select_cc_f64_f32_param_3]; +; CHECK-NEXT: ld.param.b64 %rd5, [test_select_cc_f64_f32_param_2]; +; CHECK-NEXT: mov.b64 {%r1, %r2}, %rd6; +; CHECK-NEXT: mov.b64 {%r3, %r4}, %rd5; +; CHECK-NEXT: setp.neu.f32 %p1, %r3, %r1; +; CHECK-NEXT: setp.neu.f32 %p2, %r4, %r2; ; CHECK-NEXT: selp.f64 %rd7, %rd2, %rd4, %p2; ; CHECK-NEXT: selp.f64 %rd8, %rd1, %rd3, %p1; ; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd8, %rd7}; @@ -925,12 +1000,14 @@ define <2 x float> @test_select_cc_f32_f64(<2 x float> %a, <2 x float> %b, <2 x ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.v2.b64 {%rd5, %rd6}, [test_select_cc_f32_f64_param_3]; ; CHECK-NEXT: ld.param.v2.b64 {%rd3, %rd4}, [test_select_cc_f32_f64_param_2]; -; CHECK-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_select_cc_f32_f64_param_0]; +; CHECK-NEXT: ld.param.b64 %rd2, [test_select_cc_f32_f64_param_1]; +; CHECK-NEXT: ld.param.b64 %rd1, [test_select_cc_f32_f64_param_0]; ; CHECK-NEXT: setp.neu.f64 %p1, %rd3, %rd5; ; CHECK-NEXT: setp.neu.f64 %p2, %rd4, %rd6; -; CHECK-NEXT: ld.param.v2.b32 {%r3, %r4}, [test_select_cc_f32_f64_param_1]; -; CHECK-NEXT: selp.f32 %r5, %r2, %r4, %p2; -; CHECK-NEXT: selp.f32 %r6, %r1, %r3, %p1; +; CHECK-NEXT: mov.b64 {%r1, %r2}, %rd2; +; CHECK-NEXT: mov.b64 {%r3, %r4}, %rd1; +; CHECK-NEXT: selp.f32 %r5, %r4, %r2, %p2; +; CHECK-NEXT: selp.f32 %r6, %r3, %r1, %p1; ; CHECK-NEXT: st.param.v2.b32 [func_retval0], {%r6, %r5}; ; CHECK-NEXT: ret; %cc = fcmp une <2 x double> %c, %d @@ -947,10 +1024,12 @@ define <2 x i1> @test_fcmp_une(<2 x float> %a, <2 x float> %b) #0 { ; CHECK-NEXT: .reg .b64 %rd<3>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_fcmp_une_param_0]; -; CHECK-NEXT: ld.param.v2.b32 {%r3, %r4}, [test_fcmp_une_param_1]; -; CHECK-NEXT: setp.neu.f32 %p1, %r2, %r4; -; CHECK-NEXT: setp.neu.f32 %p2, %r1, %r3; +; CHECK-NEXT: ld.param.b64 %rd2, [test_fcmp_une_param_1]; +; CHECK-NEXT: ld.param.b64 %rd1, [test_fcmp_une_param_0]; +; CHECK-NEXT: mov.b64 {%r1, %r2}, %rd2; +; CHECK-NEXT: mov.b64 {%r3, %r4}, %rd1; +; CHECK-NEXT: setp.neu.f32 %p1, %r4, %r2; +; CHECK-NEXT: setp.neu.f32 %p2, %r3, %r1; ; CHECK-NEXT: selp.b16 %rs1, -1, 0, %p2; ; CHECK-NEXT: st.param.b8 [func_retval0], %rs1; ; CHECK-NEXT: selp.b16 %rs2, -1, 0, %p1; @@ -969,10 +1048,12 @@ define <2 x i1> @test_fcmp_ueq(<2 x float> %a, <2 x float> %b) #0 { ; CHECK-NEXT: .reg .b64 %rd<3>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_fcmp_ueq_param_0]; -; CHECK-NEXT: ld.param.v2.b32 {%r3, %r4}, [test_fcmp_ueq_param_1]; -; CHECK-NEXT: setp.equ.f32 %p1, %r2, %r4; -; CHECK-NEXT: setp.equ.f32 %p2, %r1, %r3; +; CHECK-NEXT: ld.param.b64 %rd2, [test_fcmp_ueq_param_1]; +; CHECK-NEXT: ld.param.b64 %rd1, [test_fcmp_ueq_param_0]; +; CHECK-NEXT: mov.b64 {%r1, %r2}, %rd2; +; CHECK-NEXT: mov.b64 {%r3, %r4}, %rd1; +; CHECK-NEXT: setp.equ.f32 %p1, %r4, %r2; +; CHECK-NEXT: setp.equ.f32 %p2, %r3, %r1; ; CHECK-NEXT: selp.b16 %rs1, -1, 0, %p2; ; CHECK-NEXT: st.param.b8 [func_retval0], %rs1; ; CHECK-NEXT: selp.b16 %rs2, -1, 0, %p1; @@ -991,10 +1072,12 @@ define <2 x i1> @test_fcmp_ugt(<2 x float> %a, <2 x float> %b) #0 { ; CHECK-NEXT: .reg .b64 %rd<3>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_fcmp_ugt_param_0]; -; CHECK-NEXT: ld.param.v2.b32 {%r3, %r4}, [test_fcmp_ugt_param_1]; -; CHECK-NEXT: setp.gtu.f32 %p1, %r2, %r4; -; CHECK-NEXT: setp.gtu.f32 %p2, %r1, %r3; +; CHECK-NEXT: ld.param.b64 %rd2, [test_fcmp_ugt_param_1]; +; CHECK-NEXT: ld.param.b64 %rd1, [test_fcmp_ugt_param_0]; +; CHECK-NEXT: mov.b64 {%r1, %r2}, %rd2; +; CHECK-NEXT: mov.b64 {%r3, %r4}, %rd1; +; CHECK-NEXT: setp.gtu.f32 %p1, %r4, %r2; +; CHECK-NEXT: setp.gtu.f32 %p2, %r3, %r1; ; CHECK-NEXT: selp.b16 %rs1, -1, 0, %p2; ; CHECK-NEXT: st.param.b8 [func_retval0], %rs1; ; CHECK-NEXT: selp.b16 %rs2, -1, 0, %p1; @@ -1013,10 +1096,12 @@ define <2 x i1> @test_fcmp_uge(<2 x float> %a, <2 x float> %b) #0 { ; CHECK-NEXT: .reg .b64 %rd<3>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_fcmp_uge_param_0]; -; CHECK-NEXT: ld.param.v2.b32 {%r3, %r4}, [test_fcmp_uge_param_1]; -; CHECK-NEXT: setp.geu.f32 %p1, %r2, %r4; -; CHECK-NEXT: setp.geu.f32 %p2, %r1, %r3; +; CHECK-NEXT: ld.param.b64 %rd2, [test_fcmp_uge_param_1]; +; CHECK-NEXT: ld.param.b64 %rd1, [test_fcmp_uge_param_0]; +; CHECK-NEXT: mov.b64 {%r1, %r2}, %rd2; +; CHECK-NEXT: mov.b64 {%r3, %r4}, %rd1; +; CHECK-NEXT: setp.geu.f32 %p1, %r4, %r2; +; CHECK-NEXT: setp.geu.f32 %p2, %r3, %r1; ; CHECK-NEXT: selp.b16 %rs1, -1, 0, %p2; ; CHECK-NEXT: st.param.b8 [func_retval0], %rs1; ; CHECK-NEXT: selp.b16 %rs2, -1, 0, %p1; @@ -1035,10 +1120,12 @@ define <2 x i1> @test_fcmp_ult(<2 x float> %a, <2 x float> %b) #0 { ; CHECK-NEXT: .reg .b64 %rd<3>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_fcmp_ult_param_0]; -; CHECK-NEXT: ld.param.v2.b32 {%r3, %r4}, [test_fcmp_ult_param_1]; -; CHECK-NEXT: setp.ltu.f32 %p1, %r2, %r4; -; CHECK-NEXT: setp.ltu.f32 %p2, %r1, %r3; +; CHECK-NEXT: ld.param.b64 %rd2, [test_fcmp_ult_param_1]; +; CHECK-NEXT: ld.param.b64 %rd1, [test_fcmp_ult_param_0]; +; CHECK-NEXT: mov.b64 {%r1, %r2}, %rd2; +; CHECK-NEXT: mov.b64 {%r3, %r4}, %rd1; +; CHECK-NEXT: setp.ltu.f32 %p1, %r4, %r2; +; CHECK-NEXT: setp.ltu.f32 %p2, %r3, %r1; ; CHECK-NEXT: selp.b16 %rs1, -1, 0, %p2; ; CHECK-NEXT: st.param.b8 [func_retval0], %rs1; ; CHECK-NEXT: selp.b16 %rs2, -1, 0, %p1; @@ -1057,10 +1144,12 @@ define <2 x i1> @test_fcmp_ule(<2 x float> %a, <2 x float> %b) #0 { ; CHECK-NEXT: .reg .b64 %rd<3>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_fcmp_ule_param_0]; -; CHECK-NEXT: ld.param.v2.b32 {%r3, %r4}, [test_fcmp_ule_param_1]; -; CHECK-NEXT: setp.leu.f32 %p1, %r2, %r4; -; CHECK-NEXT: setp.leu.f32 %p2, %r1, %r3; +; CHECK-NEXT: ld.param.b64 %rd2, [test_fcmp_ule_param_1]; +; CHECK-NEXT: ld.param.b64 %rd1, [test_fcmp_ule_param_0]; +; CHECK-NEXT: mov.b64 {%r1, %r2}, %rd2; +; CHECK-NEXT: mov.b64 {%r3, %r4}, %rd1; +; CHECK-NEXT: setp.leu.f32 %p1, %r4, %r2; +; CHECK-NEXT: setp.leu.f32 %p2, %r3, %r1; ; CHECK-NEXT: selp.b16 %rs1, -1, 0, %p2; ; CHECK-NEXT: st.param.b8 [func_retval0], %rs1; ; CHECK-NEXT: selp.b16 %rs2, -1, 0, %p1; @@ -1079,10 +1168,12 @@ define <2 x i1> @test_fcmp_uno(<2 x float> %a, <2 x float> %b) #0 { ; CHECK-NEXT: .reg .b64 %rd<3>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_fcmp_uno_param_0]; -; CHECK-NEXT: ld.param.v2.b32 {%r3, %r4}, [test_fcmp_uno_param_1]; -; CHECK-NEXT: setp.nan.f32 %p1, %r2, %r4; -; CHECK-NEXT: setp.nan.f32 %p2, %r1, %r3; +; CHECK-NEXT: ld.param.b64 %rd2, [test_fcmp_uno_param_1]; +; CHECK-NEXT: ld.param.b64 %rd1, [test_fcmp_uno_param_0]; +; CHECK-NEXT: mov.b64 {%r1, %r2}, %rd2; +; CHECK-NEXT: mov.b64 {%r3, %r4}, %rd1; +; CHECK-NEXT: setp.nan.f32 %p1, %r4, %r2; +; CHECK-NEXT: setp.nan.f32 %p2, %r3, %r1; ; CHECK-NEXT: selp.b16 %rs1, -1, 0, %p2; ; CHECK-NEXT: st.param.b8 [func_retval0], %rs1; ; CHECK-NEXT: selp.b16 %rs2, -1, 0, %p1; @@ -1101,10 +1192,12 @@ define <2 x i1> @test_fcmp_one(<2 x float> %a, <2 x float> %b) #0 { ; CHECK-NEXT: .reg .b64 %rd<3>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_fcmp_one_param_0]; -; CHECK-NEXT: ld.param.v2.b32 {%r3, %r4}, [test_fcmp_one_param_1]; -; CHECK-NEXT: setp.ne.f32 %p1, %r2, %r4; -; CHECK-NEXT: setp.ne.f32 %p2, %r1, %r3; +; CHECK-NEXT: ld.param.b64 %rd2, [test_fcmp_one_param_1]; +; CHECK-NEXT: ld.param.b64 %rd1, [test_fcmp_one_param_0]; +; CHECK-NEXT: mov.b64 {%r1, %r2}, %rd2; +; CHECK-NEXT: mov.b64 {%r3, %r4}, %rd1; +; CHECK-NEXT: setp.ne.f32 %p1, %r4, %r2; +; CHECK-NEXT: setp.ne.f32 %p2, %r3, %r1; ; CHECK-NEXT: selp.b16 %rs1, -1, 0, %p2; ; CHECK-NEXT: st.param.b8 [func_retval0], %rs1; ; CHECK-NEXT: selp.b16 %rs2, -1, 0, %p1; @@ -1123,10 +1216,12 @@ define <2 x i1> @test_fcmp_oeq(<2 x float> %a, <2 x float> %b) #0 { ; CHECK-NEXT: .reg .b64 %rd<3>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_fcmp_oeq_param_0]; -; CHECK-NEXT: ld.param.v2.b32 {%r3, %r4}, [test_fcmp_oeq_param_1]; -; CHECK-NEXT: setp.eq.f32 %p1, %r2, %r4; -; CHECK-NEXT: setp.eq.f32 %p2, %r1, %r3; +; CHECK-NEXT: ld.param.b64 %rd2, [test_fcmp_oeq_param_1]; +; CHECK-NEXT: ld.param.b64 %rd1, [test_fcmp_oeq_param_0]; +; CHECK-NEXT: mov.b64 {%r1, %r2}, %rd2; +; CHECK-NEXT: mov.b64 {%r3, %r4}, %rd1; +; CHECK-NEXT: setp.eq.f32 %p1, %r4, %r2; +; CHECK-NEXT: setp.eq.f32 %p2, %r3, %r1; ; CHECK-NEXT: selp.b16 %rs1, -1, 0, %p2; ; CHECK-NEXT: st.param.b8 [func_retval0], %rs1; ; CHECK-NEXT: selp.b16 %rs2, -1, 0, %p1; @@ -1145,10 +1240,12 @@ define <2 x i1> @test_fcmp_ogt(<2 x float> %a, <2 x float> %b) #0 { ; CHECK-NEXT: .reg .b64 %rd<3>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_fcmp_ogt_param_0]; -; CHECK-NEXT: ld.param.v2.b32 {%r3, %r4}, [test_fcmp_ogt_param_1]; -; CHECK-NEXT: setp.gt.f32 %p1, %r2, %r4; -; CHECK-NEXT: setp.gt.f32 %p2, %r1, %r3; +; CHECK-NEXT: ld.param.b64 %rd2, [test_fcmp_ogt_param_1]; +; CHECK-NEXT: ld.param.b64 %rd1, [test_fcmp_ogt_param_0]; +; CHECK-NEXT: mov.b64 {%r1, %r2}, %rd2; +; CHECK-NEXT: mov.b64 {%r3, %r4}, %rd1; +; CHECK-NEXT: setp.gt.f32 %p1, %r4, %r2; +; CHECK-NEXT: setp.gt.f32 %p2, %r3, %r1; ; CHECK-NEXT: selp.b16 %rs1, -1, 0, %p2; ; CHECK-NEXT: st.param.b8 [func_retval0], %rs1; ; CHECK-NEXT: selp.b16 %rs2, -1, 0, %p1; @@ -1167,10 +1264,12 @@ define <2 x i1> @test_fcmp_oge(<2 x float> %a, <2 x float> %b) #0 { ; CHECK-NEXT: .reg .b64 %rd<3>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_fcmp_oge_param_0]; -; CHECK-NEXT: ld.param.v2.b32 {%r3, %r4}, [test_fcmp_oge_param_1]; -; CHECK-NEXT: setp.ge.f32 %p1, %r2, %r4; -; CHECK-NEXT: setp.ge.f32 %p2, %r1, %r3; +; CHECK-NEXT: ld.param.b64 %rd2, [test_fcmp_oge_param_1]; +; CHECK-NEXT: ld.param.b64 %rd1, [test_fcmp_oge_param_0]; +; CHECK-NEXT: mov.b64 {%r1, %r2}, %rd2; +; CHECK-NEXT: mov.b64 {%r3, %r4}, %rd1; +; CHECK-NEXT: setp.ge.f32 %p1, %r4, %r2; +; CHECK-NEXT: setp.ge.f32 %p2, %r3, %r1; ; CHECK-NEXT: selp.b16 %rs1, -1, 0, %p2; ; CHECK-NEXT: st.param.b8 [func_retval0], %rs1; ; CHECK-NEXT: selp.b16 %rs2, -1, 0, %p1; @@ -1189,10 +1288,12 @@ define <2 x i1> @test_fcmp_olt(<2 x float> %a, <2 x float> %b) #0 { ; CHECK-NEXT: .reg .b64 %rd<3>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_fcmp_olt_param_0]; -; CHECK-NEXT: ld.param.v2.b32 {%r3, %r4}, [test_fcmp_olt_param_1]; -; CHECK-NEXT: setp.lt.f32 %p1, %r2, %r4; -; CHECK-NEXT: setp.lt.f32 %p2, %r1, %r3; +; CHECK-NEXT: ld.param.b64 %rd2, [test_fcmp_olt_param_1]; +; CHECK-NEXT: ld.param.b64 %rd1, [test_fcmp_olt_param_0]; +; CHECK-NEXT: mov.b64 {%r1, %r2}, %rd2; +; CHECK-NEXT: mov.b64 {%r3, %r4}, %rd1; +; CHECK-NEXT: setp.lt.f32 %p1, %r4, %r2; +; CHECK-NEXT: setp.lt.f32 %p2, %r3, %r1; ; CHECK-NEXT: selp.b16 %rs1, -1, 0, %p2; ; CHECK-NEXT: st.param.b8 [func_retval0], %rs1; ; CHECK-NEXT: selp.b16 %rs2, -1, 0, %p1; @@ -1211,10 +1312,12 @@ define <2 x i1> @test_fcmp_ole(<2 x float> %a, <2 x float> %b) #0 { ; CHECK-NEXT: .reg .b64 %rd<3>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_fcmp_ole_param_0]; -; CHECK-NEXT: ld.param.v2.b32 {%r3, %r4}, [test_fcmp_ole_param_1]; -; CHECK-NEXT: setp.le.f32 %p1, %r2, %r4; -; CHECK-NEXT: setp.le.f32 %p2, %r1, %r3; +; CHECK-NEXT: ld.param.b64 %rd2, [test_fcmp_ole_param_1]; +; CHECK-NEXT: ld.param.b64 %rd1, [test_fcmp_ole_param_0]; +; CHECK-NEXT: mov.b64 {%r1, %r2}, %rd2; +; CHECK-NEXT: mov.b64 {%r3, %r4}, %rd1; +; CHECK-NEXT: setp.le.f32 %p1, %r4, %r2; +; CHECK-NEXT: setp.le.f32 %p2, %r3, %r1; ; CHECK-NEXT: selp.b16 %rs1, -1, 0, %p2; ; CHECK-NEXT: st.param.b8 [func_retval0], %rs1; ; CHECK-NEXT: selp.b16 %rs2, -1, 0, %p1; @@ -1233,10 +1336,12 @@ define <2 x i1> @test_fcmp_ord(<2 x float> %a, <2 x float> %b) #0 { ; CHECK-NEXT: .reg .b64 %rd<3>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_fcmp_ord_param_0]; -; CHECK-NEXT: ld.param.v2.b32 {%r3, %r4}, [test_fcmp_ord_param_1]; -; CHECK-NEXT: setp.num.f32 %p1, %r2, %r4; -; CHECK-NEXT: setp.num.f32 %p2, %r1, %r3; +; CHECK-NEXT: ld.param.b64 %rd2, [test_fcmp_ord_param_1]; +; CHECK-NEXT: ld.param.b64 %rd1, [test_fcmp_ord_param_0]; +; CHECK-NEXT: mov.b64 {%r1, %r2}, %rd2; +; CHECK-NEXT: mov.b64 {%r3, %r4}, %rd1; +; CHECK-NEXT: setp.num.f32 %p1, %r4, %r2; +; CHECK-NEXT: setp.num.f32 %p2, %r3, %r1; ; CHECK-NEXT: selp.b16 %rs1, -1, 0, %p2; ; CHECK-NEXT: st.param.b8 [func_retval0], %rs1; ; CHECK-NEXT: selp.b16 %rs2, -1, 0, %p1; @@ -1253,7 +1358,8 @@ define <2 x i32> @test_fptosi_i32(<2 x float> %a) #0 { ; CHECK-NEXT: .reg .b64 %rd<2>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_fptosi_i32_param_0]; +; CHECK-NEXT: ld.param.b64 %rd1, [test_fptosi_i32_param_0]; +; CHECK-NEXT: mov.b64 {%r1, %r2}, %rd1; ; CHECK-NEXT: cvt.rzi.s32.f32 %r3, %r2; ; CHECK-NEXT: cvt.rzi.s32.f32 %r4, %r1; ; CHECK-NEXT: st.param.v2.b32 [func_retval0], {%r4, %r3}; @@ -1269,7 +1375,8 @@ define <2 x i64> @test_fptosi_i64(<2 x float> %a) #0 { ; CHECK-NEXT: .reg .b64 %rd<4>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_fptosi_i64_param_0]; +; CHECK-NEXT: ld.param.b64 %rd1, [test_fptosi_i64_param_0]; +; CHECK-NEXT: mov.b64 {%r1, %r2}, %rd1; ; CHECK-NEXT: cvt.rzi.s64.f32 %rd2, %r2; ; CHECK-NEXT: cvt.rzi.s64.f32 %rd3, %r1; ; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd3, %rd2}; @@ -1285,7 +1392,8 @@ define <2 x i32> @test_fptoui_2xi32(<2 x float> %a) #0 { ; CHECK-NEXT: .reg .b64 %rd<2>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_fptoui_2xi32_param_0]; +; CHECK-NEXT: ld.param.b64 %rd1, [test_fptoui_2xi32_param_0]; +; CHECK-NEXT: mov.b64 {%r1, %r2}, %rd1; ; CHECK-NEXT: cvt.rzi.u32.f32 %r3, %r2; ; CHECK-NEXT: cvt.rzi.u32.f32 %r4, %r1; ; CHECK-NEXT: st.param.v2.b32 [func_retval0], {%r4, %r3}; @@ -1301,7 +1409,8 @@ define <2 x i64> @test_fptoui_2xi64(<2 x float> %a) #0 { ; CHECK-NEXT: .reg .b64 %rd<4>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_fptoui_2xi64_param_0]; +; CHECK-NEXT: ld.param.b64 %rd1, [test_fptoui_2xi64_param_0]; +; CHECK-NEXT: mov.b64 {%r1, %r2}, %rd1; ; CHECK-NEXT: cvt.rzi.u64.f32 %rd2, %r2; ; CHECK-NEXT: cvt.rzi.u64.f32 %rd3, %r1; ; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd3, %rd2}; @@ -1380,9 +1489,10 @@ define <2 x float> @test_uitofp_2xi32_fadd(<2 x i32> %a, <2 x float> %b) #0 { ; CHECK-NOF32X2-EMPTY: ; CHECK-NOF32X2-NEXT: // %bb.0: ; CHECK-NOF32X2-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_uitofp_2xi32_fadd_param_0]; +; CHECK-NOF32X2-NEXT: ld.param.b64 %rd1, [test_uitofp_2xi32_fadd_param_1]; ; CHECK-NOF32X2-NEXT: cvt.rn.f32.u32 %r3, %r1; ; CHECK-NOF32X2-NEXT: cvt.rn.f32.u32 %r4, %r2; -; CHECK-NOF32X2-NEXT: ld.param.v2.b32 {%r5, %r6}, [test_uitofp_2xi32_fadd_param_1]; +; CHECK-NOF32X2-NEXT: mov.b64 {%r5, %r6}, %rd1; ; CHECK-NOF32X2-NEXT: add.rn.f32 %r7, %r6, %r4; ; CHECK-NOF32X2-NEXT: add.rn.f32 %r8, %r5, %r3; ; CHECK-NOF32X2-NEXT: st.param.v2.b32 [func_retval0], {%r8, %r7}; @@ -1431,7 +1541,8 @@ define <2 x double> @test_fpext_2xdouble(<2 x float> %a) #0 { ; CHECK-NEXT: .reg .b64 %rd<4>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_fpext_2xdouble_param_0]; +; CHECK-NEXT: ld.param.b64 %rd1, [test_fpext_2xdouble_param_0]; +; CHECK-NEXT: mov.b64 {%r1, %r2}, %rd1; ; CHECK-NEXT: cvt.f64.f32 %rd2, %r2; ; CHECK-NEXT: cvt.f64.f32 %rd3, %r1; ; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd3, %rd2}; @@ -1499,7 +1610,8 @@ define <2 x float> @test_sqrt(<2 x float> %a) #0 { ; CHECK-NEXT: .reg .b64 %rd<2>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_sqrt_param_0]; +; CHECK-NEXT: ld.param.b64 %rd1, [test_sqrt_param_0]; +; CHECK-NEXT: mov.b64 {%r1, %r2}, %rd1; ; CHECK-NEXT: sqrt.rn.f32 %r3, %r2; ; CHECK-NEXT: sqrt.rn.f32 %r4, %r1; ; CHECK-NEXT: st.param.v2.b32 [func_retval0], {%r4, %r3}; @@ -1522,7 +1634,8 @@ define <2 x float> @test_sin(<2 x float> %a) #0 #1 { ; CHECK-NEXT: .reg .b64 %rd<2>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_sin_param_0]; +; CHECK-NEXT: ld.param.b64 %rd1, [test_sin_param_0]; +; CHECK-NEXT: mov.b64 {%r1, %r2}, %rd1; ; CHECK-NEXT: sin.approx.f32 %r3, %r2; ; CHECK-NEXT: sin.approx.f32 %r4, %r1; ; CHECK-NEXT: st.param.v2.b32 [func_retval0], {%r4, %r3}; @@ -1538,7 +1651,8 @@ define <2 x float> @test_cos(<2 x float> %a) #0 #1 { ; CHECK-NEXT: .reg .b64 %rd<2>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_cos_param_0]; +; CHECK-NEXT: ld.param.b64 %rd1, [test_cos_param_0]; +; CHECK-NEXT: mov.b64 {%r1, %r2}, %rd1; ; CHECK-NEXT: cos.approx.f32 %r3, %r2; ; CHECK-NEXT: cos.approx.f32 %r4, %r1; ; CHECK-NEXT: st.param.v2.b32 [func_retval0], {%r4, %r3}; @@ -1597,11 +1711,14 @@ define <2 x float> @test_fma(<2 x float> %a, <2 x float> %b, <2 x float> %c) #0 ; CHECK-NOF32X2-NEXT: .reg .b64 %rd<4>; ; CHECK-NOF32X2-EMPTY: ; CHECK-NOF32X2-NEXT: // %bb.0: -; CHECK-NOF32X2-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_fma_param_0]; -; CHECK-NOF32X2-NEXT: ld.param.v2.b32 {%r3, %r4}, [test_fma_param_1]; -; CHECK-NOF32X2-NEXT: ld.param.v2.b32 {%r5, %r6}, [test_fma_param_2]; -; CHECK-NOF32X2-NEXT: fma.rn.f32 %r7, %r2, %r4, %r6; -; CHECK-NOF32X2-NEXT: fma.rn.f32 %r8, %r1, %r3, %r5; +; CHECK-NOF32X2-NEXT: ld.param.b64 %rd3, [test_fma_param_2]; +; CHECK-NOF32X2-NEXT: ld.param.b64 %rd2, [test_fma_param_1]; +; CHECK-NOF32X2-NEXT: ld.param.b64 %rd1, [test_fma_param_0]; +; CHECK-NOF32X2-NEXT: mov.b64 {%r1, %r2}, %rd3; +; CHECK-NOF32X2-NEXT: mov.b64 {%r3, %r4}, %rd2; +; CHECK-NOF32X2-NEXT: mov.b64 {%r5, %r6}, %rd1; +; CHECK-NOF32X2-NEXT: fma.rn.f32 %r7, %r6, %r4, %r2; +; CHECK-NOF32X2-NEXT: fma.rn.f32 %r8, %r5, %r3, %r1; ; CHECK-NOF32X2-NEXT: st.param.v2.b32 [func_retval0], {%r8, %r7}; ; CHECK-NOF32X2-NEXT: ret; ; @@ -1627,7 +1744,8 @@ define <2 x float> @test_fabs(<2 x float> %a) #0 { ; CHECK-NEXT: .reg .b64 %rd<2>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_fabs_param_0]; +; CHECK-NEXT: ld.param.b64 %rd1, [test_fabs_param_0]; +; CHECK-NEXT: mov.b64 {%r1, %r2}, %rd1; ; CHECK-NEXT: abs.f32 %r3, %r2; ; CHECK-NEXT: abs.f32 %r4, %r1; ; CHECK-NEXT: st.param.v2.b32 [func_retval0], {%r4, %r3}; @@ -1643,10 +1761,12 @@ define <2 x float> @test_minnum(<2 x float> %a, <2 x float> %b) #0 { ; CHECK-NEXT: .reg .b64 %rd<3>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_minnum_param_0]; -; CHECK-NEXT: ld.param.v2.b32 {%r3, %r4}, [test_minnum_param_1]; -; CHECK-NEXT: min.f32 %r5, %r2, %r4; -; CHECK-NEXT: min.f32 %r6, %r1, %r3; +; CHECK-NEXT: ld.param.b64 %rd2, [test_minnum_param_1]; +; CHECK-NEXT: ld.param.b64 %rd1, [test_minnum_param_0]; +; CHECK-NEXT: mov.b64 {%r1, %r2}, %rd2; +; CHECK-NEXT: mov.b64 {%r3, %r4}, %rd1; +; CHECK-NEXT: min.f32 %r5, %r4, %r2; +; CHECK-NEXT: min.f32 %r6, %r3, %r1; ; CHECK-NEXT: st.param.v2.b32 [func_retval0], {%r6, %r5}; ; CHECK-NEXT: ret; %r = call <2 x float> @llvm.minnum(<2 x float> %a, <2 x float> %b) @@ -1660,10 +1780,12 @@ define <2 x float> @test_maxnum(<2 x float> %a, <2 x float> %b) #0 { ; CHECK-NEXT: .reg .b64 %rd<3>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_maxnum_param_0]; -; CHECK-NEXT: ld.param.v2.b32 {%r3, %r4}, [test_maxnum_param_1]; -; CHECK-NEXT: max.f32 %r5, %r2, %r4; -; CHECK-NEXT: max.f32 %r6, %r1, %r3; +; CHECK-NEXT: ld.param.b64 %rd2, [test_maxnum_param_1]; +; CHECK-NEXT: ld.param.b64 %rd1, [test_maxnum_param_0]; +; CHECK-NEXT: mov.b64 {%r1, %r2}, %rd2; +; CHECK-NEXT: mov.b64 {%r3, %r4}, %rd1; +; CHECK-NEXT: max.f32 %r5, %r4, %r2; +; CHECK-NEXT: max.f32 %r6, %r3, %r1; ; CHECK-NEXT: st.param.v2.b32 [func_retval0], {%r6, %r5}; ; CHECK-NEXT: ret; %r = call <2 x float> @llvm.maxnum(<2 x float> %a, <2 x float> %b) @@ -1677,8 +1799,10 @@ define <2 x float> @test_copysign(<2 x float> %a, <2 x float> %b) #0 { ; CHECK-NEXT: .reg .b64 %rd<3>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_copysign_param_0]; -; CHECK-NEXT: ld.param.v2.b32 {%r3, %r4}, [test_copysign_param_1]; +; CHECK-NEXT: ld.param.b64 %rd2, [test_copysign_param_1]; +; CHECK-NEXT: ld.param.b64 %rd1, [test_copysign_param_0]; +; CHECK-NEXT: mov.b64 {%r1, %r2}, %rd1; +; CHECK-NEXT: mov.b64 {%r3, %r4}, %rd2; ; CHECK-NEXT: copysign.f32 %r5, %r4, %r2; ; CHECK-NEXT: copysign.f32 %r6, %r3, %r1; ; CHECK-NEXT: st.param.v2.b32 [func_retval0], {%r6, %r5}; @@ -1696,18 +1820,19 @@ define <2 x float> @test_copysign_f64(<2 x float> %a, <2 x double> %b) #0 { ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.v2.b64 {%rd2, %rd3}, [test_copysign_f64_param_1]; -; CHECK-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_copysign_f64_param_0]; -; CHECK-NEXT: abs.f32 %r3, %r2; -; CHECK-NEXT: neg.f32 %r4, %r3; +; CHECK-NEXT: ld.param.b64 %rd1, [test_copysign_f64_param_0]; ; CHECK-NEXT: shr.u64 %rd4, %rd3, 63; ; CHECK-NEXT: and.b64 %rd5, %rd4, 1; ; CHECK-NEXT: setp.ne.b64 %p1, %rd5, 0; +; CHECK-NEXT: mov.b64 {%r1, %r2}, %rd1; +; CHECK-NEXT: abs.f32 %r3, %r2; +; CHECK-NEXT: neg.f32 %r4, %r3; ; CHECK-NEXT: selp.f32 %r5, %r4, %r3, %p1; -; CHECK-NEXT: abs.f32 %r6, %r1; -; CHECK-NEXT: neg.f32 %r7, %r6; ; CHECK-NEXT: shr.u64 %rd6, %rd2, 63; ; CHECK-NEXT: and.b64 %rd7, %rd6, 1; ; CHECK-NEXT: setp.ne.b64 %p2, %rd7, 0; +; CHECK-NEXT: abs.f32 %r6, %r1; +; CHECK-NEXT: neg.f32 %r7, %r6; ; CHECK-NEXT: selp.f32 %r8, %r7, %r6, %p2; ; CHECK-NEXT: st.param.v2.b32 [func_retval0], {%r8, %r5}; ; CHECK-NEXT: ret; @@ -1723,8 +1848,10 @@ define <2 x double> @test_copysign_extended(<2 x float> %a, <2 x float> %b) #0 { ; CHECK-NEXT: .reg .b64 %rd<5>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_copysign_extended_param_0]; -; CHECK-NEXT: ld.param.v2.b32 {%r3, %r4}, [test_copysign_extended_param_1]; +; CHECK-NEXT: ld.param.b64 %rd2, [test_copysign_extended_param_1]; +; CHECK-NEXT: ld.param.b64 %rd1, [test_copysign_extended_param_0]; +; CHECK-NEXT: mov.b64 {%r1, %r2}, %rd1; +; CHECK-NEXT: mov.b64 {%r3, %r4}, %rd2; ; CHECK-NEXT: copysign.f32 %r5, %r3, %r1; ; CHECK-NEXT: copysign.f32 %r6, %r4, %r2; ; CHECK-NEXT: cvt.f64.f32 %rd3, %r6; @@ -1743,7 +1870,8 @@ define <2 x float> @test_floor(<2 x float> %a) #0 { ; CHECK-NEXT: .reg .b64 %rd<2>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_floor_param_0]; +; CHECK-NEXT: ld.param.b64 %rd1, [test_floor_param_0]; +; CHECK-NEXT: mov.b64 {%r1, %r2}, %rd1; ; CHECK-NEXT: cvt.rmi.f32.f32 %r3, %r2; ; CHECK-NEXT: cvt.rmi.f32.f32 %r4, %r1; ; CHECK-NEXT: st.param.v2.b32 [func_retval0], {%r4, %r3}; @@ -1759,7 +1887,8 @@ define <2 x float> @test_ceil(<2 x float> %a) #0 { ; CHECK-NEXT: .reg .b64 %rd<2>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_ceil_param_0]; +; CHECK-NEXT: ld.param.b64 %rd1, [test_ceil_param_0]; +; CHECK-NEXT: mov.b64 {%r1, %r2}, %rd1; ; CHECK-NEXT: cvt.rpi.f32.f32 %r3, %r2; ; CHECK-NEXT: cvt.rpi.f32.f32 %r4, %r1; ; CHECK-NEXT: st.param.v2.b32 [func_retval0], {%r4, %r3}; @@ -1775,7 +1904,8 @@ define <2 x float> @test_trunc(<2 x float> %a) #0 { ; CHECK-NEXT: .reg .b64 %rd<2>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_trunc_param_0]; +; CHECK-NEXT: ld.param.b64 %rd1, [test_trunc_param_0]; +; CHECK-NEXT: mov.b64 {%r1, %r2}, %rd1; ; CHECK-NEXT: cvt.rzi.f32.f32 %r3, %r2; ; CHECK-NEXT: cvt.rzi.f32.f32 %r4, %r1; ; CHECK-NEXT: st.param.v2.b32 [func_retval0], {%r4, %r3}; @@ -1791,7 +1921,8 @@ define <2 x float> @test_rint(<2 x float> %a) #0 { ; CHECK-NEXT: .reg .b64 %rd<2>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_rint_param_0]; +; CHECK-NEXT: ld.param.b64 %rd1, [test_rint_param_0]; +; CHECK-NEXT: mov.b64 {%r1, %r2}, %rd1; ; CHECK-NEXT: cvt.rni.f32.f32 %r3, %r2; ; CHECK-NEXT: cvt.rni.f32.f32 %r4, %r1; ; CHECK-NEXT: st.param.v2.b32 [func_retval0], {%r4, %r3}; @@ -1807,7 +1938,8 @@ define <2 x float> @test_nearbyint(<2 x float> %a) #0 { ; CHECK-NEXT: .reg .b64 %rd<2>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_nearbyint_param_0]; +; CHECK-NEXT: ld.param.b64 %rd1, [test_nearbyint_param_0]; +; CHECK-NEXT: mov.b64 {%r1, %r2}, %rd1; ; CHECK-NEXT: cvt.rni.f32.f32 %r3, %r2; ; CHECK-NEXT: cvt.rni.f32.f32 %r4, %r1; ; CHECK-NEXT: st.param.v2.b32 [func_retval0], {%r4, %r3}; @@ -1823,7 +1955,8 @@ define <2 x float> @test_roundeven(<2 x float> %a) #0 { ; CHECK-NEXT: .reg .b64 %rd<2>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_roundeven_param_0]; +; CHECK-NEXT: ld.param.b64 %rd1, [test_roundeven_param_0]; +; CHECK-NEXT: mov.b64 {%r1, %r2}, %rd1; ; CHECK-NEXT: cvt.rni.f32.f32 %r3, %r2; ; CHECK-NEXT: cvt.rni.f32.f32 %r4, %r1; ; CHECK-NEXT: st.param.v2.b32 [func_retval0], {%r4, %r3}; @@ -1841,7 +1974,8 @@ define <2 x float> @test_round(<2 x float> %a) #0 { ; CHECK-NEXT: .reg .b64 %rd<2>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_round_param_0]; +; CHECK-NEXT: ld.param.b64 %rd1, [test_round_param_0]; +; CHECK-NEXT: mov.b64 {%r1, %r2}, %rd1; ; CHECK-NEXT: and.b32 %r3, %r2, -2147483648; ; CHECK-NEXT: or.b32 %r4, %r3, 1056964608; ; CHECK-NEXT: add.rn.f32 %r5, %r2, %r4; @@ -1875,11 +2009,14 @@ define <2 x float> @test_fmuladd(<2 x float> %a, <2 x float> %b, <2 x float> %c) ; CHECK-NOF32X2-NEXT: .reg .b64 %rd<4>; ; CHECK-NOF32X2-EMPTY: ; CHECK-NOF32X2-NEXT: // %bb.0: -; CHECK-NOF32X2-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_fmuladd_param_0]; -; CHECK-NOF32X2-NEXT: ld.param.v2.b32 {%r3, %r4}, [test_fmuladd_param_1]; -; CHECK-NOF32X2-NEXT: ld.param.v2.b32 {%r5, %r6}, [test_fmuladd_param_2]; -; CHECK-NOF32X2-NEXT: fma.rn.f32 %r7, %r2, %r4, %r6; -; CHECK-NOF32X2-NEXT: fma.rn.f32 %r8, %r1, %r3, %r5; +; CHECK-NOF32X2-NEXT: ld.param.b64 %rd3, [test_fmuladd_param_2]; +; CHECK-NOF32X2-NEXT: ld.param.b64 %rd2, [test_fmuladd_param_1]; +; CHECK-NOF32X2-NEXT: ld.param.b64 %rd1, [test_fmuladd_param_0]; +; CHECK-NOF32X2-NEXT: mov.b64 {%r1, %r2}, %rd3; +; CHECK-NOF32X2-NEXT: mov.b64 {%r3, %r4}, %rd2; +; CHECK-NOF32X2-NEXT: mov.b64 {%r5, %r6}, %rd1; +; CHECK-NOF32X2-NEXT: fma.rn.f32 %r7, %r6, %r4, %r2; +; CHECK-NOF32X2-NEXT: fma.rn.f32 %r8, %r5, %r3, %r1; ; CHECK-NOF32X2-NEXT: st.param.v2.b32 [func_retval0], {%r8, %r7}; ; CHECK-NOF32X2-NEXT: ret; ; @@ -1905,7 +2042,8 @@ define <2 x float> @test_shufflevector(<2 x float> %a) #0 { ; CHECK-NEXT: .reg .b64 %rd<2>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_shufflevector_param_0]; +; CHECK-NEXT: ld.param.b64 %rd1, [test_shufflevector_param_0]; +; CHECK-NEXT: mov.b64 {%r1, %r2}, %rd1; ; CHECK-NEXT: st.param.v2.b32 [func_retval0], {%r2, %r1}; ; CHECK-NEXT: ret; %s = shufflevector <2 x float> %a, <2 x float> poison, <2 x i32> <i32 1, i32 0> @@ -1913,16 +2051,29 @@ define <2 x float> @test_shufflevector(<2 x float> %a) #0 { } define <2 x float> @test_insertelement(<2 x float> %a, float %x) #0 { -; CHECK-LABEL: test_insertelement( -; CHECK: { -; CHECK-NEXT: .reg .b32 %r<4>; -; CHECK-NEXT: .reg .b64 %rd<2>; -; CHECK-EMPTY: -; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: ld.param.b32 %r1, [test_insertelement_param_1]; -; CHECK-NEXT: ld.param.v2.b32 {%r2, %r3}, [test_insertelement_param_0]; -; CHECK-NEXT: st.param.v2.b32 [func_retval0], {%r2, %r1}; -; CHECK-NEXT: ret; +; CHECK-NOF32X2-LABEL: test_insertelement( +; CHECK-NOF32X2: { +; CHECK-NOF32X2-NEXT: .reg .b32 %r<3>; +; CHECK-NOF32X2-NEXT: .reg .b64 %rd<2>; +; CHECK-NOF32X2-EMPTY: +; CHECK-NOF32X2-NEXT: // %bb.0: +; CHECK-NOF32X2-NEXT: ld.param.b32 %r1, [test_insertelement_param_1]; +; CHECK-NOF32X2-NEXT: ld.param.b64 %rd1, [test_insertelement_param_0]; +; CHECK-NOF32X2-NEXT: { .reg .b32 tmp; mov.b64 {%r2, tmp}, %rd1; } +; CHECK-NOF32X2-NEXT: st.param.v2.b32 [func_retval0], {%r2, %r1}; +; CHECK-NOF32X2-NEXT: ret; +; +; CHECK-F32X2-LABEL: test_insertelement( +; CHECK-F32X2: { +; CHECK-F32X2-NEXT: .reg .b32 %r<3>; +; CHECK-F32X2-NEXT: .reg .b64 %rd<2>; +; CHECK-F32X2-EMPTY: +; CHECK-F32X2-NEXT: // %bb.0: +; CHECK-F32X2-NEXT: ld.param.b32 %r1, [test_insertelement_param_1]; +; CHECK-F32X2-NEXT: ld.param.b64 %rd1, [test_insertelement_param_0]; +; CHECK-F32X2-NEXT: mov.b64 {%r2, _}, %rd1; +; CHECK-F32X2-NEXT: st.param.v2.b32 [func_retval0], {%r2, %r1}; +; CHECK-F32X2-NEXT: ret; %i = insertelement <2 x float> %a, float %x, i64 1 ret <2 x float> %i } @@ -1957,6 +2108,43 @@ define <2 x float> @test_uitofp_2xi32_to_2xfloat(<2 x i32> %a) #0 { ret <2 x float> %r } +define void @test_trunc_to_v2bf16(<2 x float> %a, ptr %p) { +; CHECK-LABEL: test_trunc_to_v2bf16( +; CHECK: { +; CHECK-NEXT: .reg .b32 %r<4>; +; CHECK-NEXT: .reg .b64 %rd<3>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b64 %rd2, [test_trunc_to_v2bf16_param_1]; +; CHECK-NEXT: ld.param.b64 %rd1, [test_trunc_to_v2bf16_param_0]; +; CHECK-NEXT: mov.b64 {%r1, %r2}, %rd1; +; CHECK-NEXT: cvt.rn.bf16x2.f32 %r3, %r2, %r1; +; CHECK-NEXT: st.b32 [%rd2], %r3; +; CHECK-NEXT: ret; + %trunc = fptrunc <2 x float> %a to <2 x bfloat> + store <2 x bfloat> %trunc, ptr %p + ret void +} + +define void @test_trunc_to_v2f16(<2 x float> %a, ptr %p) { +; CHECK-LABEL: test_trunc_to_v2f16( +; CHECK: { +; CHECK-NEXT: .reg .b32 %r<4>; +; CHECK-NEXT: .reg .b64 %rd<3>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b64 %rd2, [test_trunc_to_v2f16_param_1]; +; CHECK-NEXT: ld.param.b64 %rd1, [test_trunc_to_v2f16_param_0]; +; CHECK-NEXT: mov.b64 {%r1, %r2}, %rd1; +; CHECK-NEXT: cvt.rn.f16x2.f32 %r3, %r2, %r1; +; CHECK-NEXT: st.b32 [%rd2], %r3; +; CHECK-NEXT: ret; + %trunc = fptrunc <2 x float> %a to <2 x half> + store <2 x half> %trunc, ptr %p + ret void +} + + attributes #0 = { nounwind } attributes #1 = { "unsafe-fp-math" = "true" } attributes #2 = { "denormal-fp-math"="preserve-sign" } |