diff options
Diffstat (limited to 'llvm/test/CodeGen/Thumb2/mve-float32regloops.ll')
| -rw-r--r-- | llvm/test/CodeGen/Thumb2/mve-float32regloops.ll | 211 |
1 files changed, 102 insertions, 109 deletions
diff --git a/llvm/test/CodeGen/Thumb2/mve-float32regloops.ll b/llvm/test/CodeGen/Thumb2/mve-float32regloops.ll index f7b4548..b6657d6 100644 --- a/llvm/test/CodeGen/Thumb2/mve-float32regloops.ll +++ b/llvm/test/CodeGen/Thumb2/mve-float32regloops.ll @@ -1573,120 +1573,115 @@ define arm_aapcs_vfpcc void @arm_biquad_cascade_df1_f32(ptr nocapture readonly % ; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} ; CHECK-NEXT: .pad #16 ; CHECK-NEXT: sub sp, #16 -; CHECK-NEXT: ldrd r7, r9, [r0] -; CHECK-NEXT: and r6, r3, #3 -; CHECK-NEXT: ldr r0, [r0, #8] -; CHECK-NEXT: lsrs r3, r3, #2 -; CHECK-NEXT: @ implicit-def: $r12 -; CHECK-NEXT: str r6, [sp, #4] @ 4-byte Spill -; CHECK-NEXT: str r3, [sp] @ 4-byte Spill -; CHECK-NEXT: str r2, [sp, #8] @ 4-byte Spill +; CHECK-NEXT: ldm.w r0, {r7, r9, r11} +; CHECK-NEXT: and r0, r3, #3 +; CHECK-NEXT: @ implicit-def: $r5 +; CHECK-NEXT: str r0, [sp, #8] @ 4-byte Spill +; CHECK-NEXT: lsrs r0, r3, #2 +; CHECK-NEXT: str r0, [sp, #4] @ 4-byte Spill ; CHECK-NEXT: b .LBB19_3 ; CHECK-NEXT: .LBB19_1: @ in Loop: Header=BB19_3 Depth=1 -; CHECK-NEXT: mov r3, r8 -; CHECK-NEXT: mov r2, r5 -; CHECK-NEXT: mov r4, r11 -; CHECK-NEXT: mov r8, r10 +; CHECK-NEXT: mov r8, r3 +; CHECK-NEXT: mov r3, r12 +; CHECK-NEXT: mov r0, r4 +; CHECK-NEXT: mov r12, r10 ; CHECK-NEXT: .LBB19_2: @ %if.end69 ; CHECK-NEXT: @ in Loop: Header=BB19_3 Depth=1 ; CHECK-NEXT: ldr r7, [sp, #12] @ 4-byte Reload -; CHECK-NEXT: adds r0, #128 -; CHECK-NEXT: strd r2, r4, [r9] -; CHECK-NEXT: ldr r2, [sp, #8] @ 4-byte Reload -; CHECK-NEXT: subs r7, #1 -; CHECK-NEXT: strd r3, r8, [r9, #8] -; CHECK-NEXT: add.w r9, r9, #16 +; CHECK-NEXT: add.w r11, r11, #128 +; CHECK-NEXT: strd r8, r0, [r9] ; CHECK-NEXT: mov r1, r2 +; CHECK-NEXT: strd r3, r12, [r9, #8] +; CHECK-NEXT: add.w r9, r9, #16 +; CHECK-NEXT: subs r7, #1 ; CHECK-NEXT: beq.w .LBB19_13 ; CHECK-NEXT: .LBB19_3: @ %do.body ; CHECK-NEXT: @ =>This Loop Header: Depth=1 ; CHECK-NEXT: @ Child Loop BB19_5 Depth 2 -; CHECK-NEXT: ldrd r5, r11, [r9] +; CHECK-NEXT: ldr.w r10, [r9, #12] ; CHECK-NEXT: mov r6, r2 -; CHECK-NEXT: ldrd r8, r10, [r9, #8] -; CHECK-NEXT: ldr r2, [sp] @ 4-byte Reload +; CHECK-NEXT: ldm.w r9, {r3, r4, r12} +; CHECK-NEXT: ldr r0, [sp, #4] @ 4-byte Reload ; CHECK-NEXT: str r7, [sp, #12] @ 4-byte Spill -; CHECK-NEXT: wls lr, r2, .LBB19_6 +; CHECK-NEXT: wls lr, r0, .LBB19_6 ; CHECK-NEXT: @ %bb.4: @ %while.body.lr.ph ; CHECK-NEXT: @ in Loop: Header=BB19_3 Depth=1 -; CHECK-NEXT: ldr r6, [sp, #8] @ 4-byte Reload -; CHECK-NEXT: mov r4, r11 -; CHECK-NEXT: mov r3, r5 +; CHECK-NEXT: mov r6, r2 ; CHECK-NEXT: .LBB19_5: @ %while.body ; CHECK-NEXT: @ Parent Loop BB19_3 Depth=1 ; CHECK-NEXT: @ => This Inner Loop Header: Depth=2 -; CHECK-NEXT: ldr r5, [r1, #12] -; CHECK-NEXT: vldrw.u32 q2, [r0] -; CHECK-NEXT: vldrw.u32 q6, [r0, #16] -; CHECK-NEXT: ldm.w r1, {r2, r7, r11} -; CHECK-NEXT: vmul.f32 q2, q2, r5 -; CHECK-NEXT: vldrw.u32 q7, [r0, #32] -; CHECK-NEXT: vfma.f32 q2, q6, r11 -; CHECK-NEXT: vldrw.u32 q4, [r0, #48] +; CHECK-NEXT: mov r5, r3 +; CHECK-NEXT: mov r8, r4 +; CHECK-NEXT: ldrd r4, r3, [r1, #8] +; CHECK-NEXT: vldrw.u32 q2, [r11] +; CHECK-NEXT: vldrw.u32 q6, [r11, #16] +; CHECK-NEXT: ldrd r0, r7, [r1] +; CHECK-NEXT: vmul.f32 q2, q2, r3 +; CHECK-NEXT: vldrw.u32 q7, [r11, #32] +; CHECK-NEXT: vfma.f32 q2, q6, r4 +; CHECK-NEXT: vldrw.u32 q4, [r11, #48] ; CHECK-NEXT: vfma.f32 q2, q7, r7 -; CHECK-NEXT: vldrw.u32 q5, [r0, #64] -; CHECK-NEXT: vfma.f32 q2, q4, r2 -; CHECK-NEXT: vldrw.u32 q3, [r0, #80] -; CHECK-NEXT: vfma.f32 q2, q5, r3 -; CHECK-NEXT: vldrw.u32 q1, [r0, #96] -; CHECK-NEXT: vfma.f32 q2, q3, r4 -; CHECK-NEXT: vldrw.u32 q0, [r0, #112] -; CHECK-NEXT: vfma.f32 q2, q1, r8 +; CHECK-NEXT: vldrw.u32 q5, [r11, #64] +; CHECK-NEXT: vfma.f32 q2, q4, r0 +; CHECK-NEXT: vldrw.u32 q3, [r11, #80] +; CHECK-NEXT: vfma.f32 q2, q5, r5 +; CHECK-NEXT: vldrw.u32 q1, [r11, #96] +; CHECK-NEXT: vfma.f32 q2, q3, r8 +; CHECK-NEXT: vldrw.u32 q0, [r11, #112] +; CHECK-NEXT: vfma.f32 q2, q1, r12 ; CHECK-NEXT: adds r1, #16 ; CHECK-NEXT: vfma.f32 q2, q0, r10 -; CHECK-NEXT: mov r4, r11 -; CHECK-NEXT: vmov r10, r8, d5 +; CHECK-NEXT: mov r5, r3 +; CHECK-NEXT: vmov r10, r12, d5 ; CHECK-NEXT: vstrb.8 q2, [r6], #16 -; CHECK-NEXT: mov r3, r5 -; CHECK-NEXT: mov r12, r5 ; CHECK-NEXT: le lr, .LBB19_5 ; CHECK-NEXT: .LBB19_6: @ %while.end ; CHECK-NEXT: @ in Loop: Header=BB19_3 Depth=1 -; CHECK-NEXT: ldr r3, [sp, #4] @ 4-byte Reload -; CHECK-NEXT: cmp r3, #0 +; CHECK-NEXT: ldr r7, [sp, #8] @ 4-byte Reload +; CHECK-NEXT: cmp r7, #0 ; CHECK-NEXT: beq .LBB19_1 ; CHECK-NEXT: @ %bb.7: @ %if.then ; CHECK-NEXT: @ in Loop: Header=BB19_3 Depth=1 -; CHECK-NEXT: ldrd lr, r4, [r1] -; CHECK-NEXT: vldrw.u32 q0, [r0] -; CHECK-NEXT: ldrd r2, r1, [r1, #8] -; CHECK-NEXT: vldrw.u32 q6, [r0, #16] -; CHECK-NEXT: vldrw.u32 q7, [r0, #32] -; CHECK-NEXT: vldrw.u32 q4, [r0, #48] +; CHECK-NEXT: ldrd lr, r0, [r1] +; CHECK-NEXT: vldrw.u32 q0, [r11] +; CHECK-NEXT: ldrd r8, r1, [r1, #8] +; CHECK-NEXT: vldrw.u32 q6, [r11, #16] +; CHECK-NEXT: vldrw.u32 q7, [r11, #32] +; CHECK-NEXT: vldrw.u32 q4, [r11, #48] ; CHECK-NEXT: vmul.f32 q0, q0, r1 -; CHECK-NEXT: vldrw.u32 q5, [r0, #64] -; CHECK-NEXT: vfma.f32 q0, q6, r2 -; CHECK-NEXT: vldrw.u32 q3, [r0, #80] -; CHECK-NEXT: vfma.f32 q0, q7, r4 -; CHECK-NEXT: vldrw.u32 q2, [r0, #96] +; CHECK-NEXT: vldrw.u32 q5, [r11, #64] +; CHECK-NEXT: vfma.f32 q0, q6, r8 +; CHECK-NEXT: vldrw.u32 q3, [r11, #80] +; CHECK-NEXT: vfma.f32 q0, q7, r0 +; CHECK-NEXT: vldrw.u32 q2, [r11, #96] ; CHECK-NEXT: vfma.f32 q0, q4, lr -; CHECK-NEXT: vldrw.u32 q1, [r0, #112] -; CHECK-NEXT: vfma.f32 q0, q5, r5 -; CHECK-NEXT: cmp r3, #1 -; CHECK-NEXT: vfma.f32 q0, q3, r11 -; CHECK-NEXT: vfma.f32 q0, q2, r8 +; CHECK-NEXT: vldrw.u32 q1, [r11, #112] +; CHECK-NEXT: vfma.f32 q0, q5, r3 +; CHECK-NEXT: cmp r7, #1 +; CHECK-NEXT: vfma.f32 q0, q3, r4 +; CHECK-NEXT: vfma.f32 q0, q2, r12 ; CHECK-NEXT: vfma.f32 q0, q1, r10 -; CHECK-NEXT: vmov r5, s0 +; CHECK-NEXT: vmov r4, s0 ; CHECK-NEXT: bne .LBB19_9 ; CHECK-NEXT: @ %bb.8: @ %if.then58 ; CHECK-NEXT: @ in Loop: Header=BB19_3 Depth=1 -; CHECK-NEXT: str r5, [r6] -; CHECK-NEXT: mov r2, lr -; CHECK-NEXT: mov r4, r12 -; CHECK-NEXT: mov r3, r5 +; CHECK-NEXT: str r4, [r6] +; CHECK-NEXT: mov r8, lr +; CHECK-NEXT: mov r0, r5 +; CHECK-NEXT: mov r3, r4 ; CHECK-NEXT: b .LBB19_12 ; CHECK-NEXT: .LBB19_9: @ %if.else ; CHECK-NEXT: @ in Loop: Header=BB19_3 Depth=1 -; CHECK-NEXT: vmov r8, s1 -; CHECK-NEXT: cmp r3, #2 +; CHECK-NEXT: vmov r12, s1 +; CHECK-NEXT: cmp r7, #2 ; CHECK-NEXT: vstr s1, [r6, #4] -; CHECK-NEXT: str r5, [r6] +; CHECK-NEXT: str r4, [r6] ; CHECK-NEXT: bne .LBB19_11 ; CHECK-NEXT: @ %bb.10: @ in Loop: Header=BB19_3 Depth=1 -; CHECK-NEXT: mov r2, r4 -; CHECK-NEXT: mov r3, r8 -; CHECK-NEXT: mov r4, lr -; CHECK-NEXT: mov r8, r5 +; CHECK-NEXT: mov r8, r0 +; CHECK-NEXT: mov r3, r12 +; CHECK-NEXT: mov r0, lr +; CHECK-NEXT: mov r12, r4 ; CHECK-NEXT: b .LBB19_12 ; CHECK-NEXT: .LBB19_11: @ %if.else64 ; CHECK-NEXT: @ in Loop: Header=BB19_3 Depth=1 @@ -1694,7 +1689,7 @@ define arm_aapcs_vfpcc void @arm_biquad_cascade_df1_f32(ptr nocapture readonly % ; CHECK-NEXT: vstr s2, [r6, #8] ; CHECK-NEXT: .LBB19_12: @ %if.end69 ; CHECK-NEXT: @ in Loop: Header=BB19_3 Depth=1 -; CHECK-NEXT: mov r12, r1 +; CHECK-NEXT: mov r5, r1 ; CHECK-NEXT: b .LBB19_2 ; CHECK-NEXT: .LBB19_13: @ %do.end ; CHECK-NEXT: add sp, #16 @@ -1901,8 +1896,8 @@ define void @arm_biquad_cascade_df2T_f32(ptr nocapture readonly %S, ptr nocaptur ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: .save {r4, r5, r6, r7, r8, lr} ; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, lr} -; CHECK-NEXT: .vsave {d8, d9, d10, d11, d12, d13} -; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13} +; CHECK-NEXT: .vsave {d8, d9, d10, d11} +; CHECK-NEXT: vpush {d8, d9, d10, d11} ; CHECK-NEXT: ldrd r6, r12, [r0, #4] ; CHECK-NEXT: lsr.w r8, r3, #1 ; CHECK-NEXT: ldrb r0, [r0] @@ -1910,11 +1905,11 @@ define void @arm_biquad_cascade_df2T_f32(ptr nocapture readonly %S, ptr nocaptur ; CHECK-NEXT: b .LBB20_3 ; CHECK-NEXT: .LBB20_1: @ %if.else ; CHECK-NEXT: @ in Loop: Header=BB20_3 Depth=1 -; CHECK-NEXT: vmov.f32 s14, s13 -; CHECK-NEXT: vstr s12, [r6] +; CHECK-NEXT: vmov.f32 s6, s5 +; CHECK-NEXT: vstr s4, [r6] ; CHECK-NEXT: .LBB20_2: @ %if.end ; CHECK-NEXT: @ in Loop: Header=BB20_3 Depth=1 -; CHECK-NEXT: vstr s14, [r6, #4] +; CHECK-NEXT: vstr s6, [r6, #4] ; CHECK-NEXT: add.w r12, r12, #20 ; CHECK-NEXT: adds r6, #8 ; CHECK-NEXT: subs r0, #1 @@ -1923,41 +1918,39 @@ define void @arm_biquad_cascade_df2T_f32(ptr nocapture readonly %S, ptr nocaptur ; CHECK-NEXT: .LBB20_3: @ %do.body ; CHECK-NEXT: @ =>This Loop Header: Depth=1 ; CHECK-NEXT: @ Child Loop BB20_5 Depth 2 -; CHECK-NEXT: vldrw.u32 q2, [r12] +; CHECK-NEXT: vldrw.u32 q3, [r12] ; CHECK-NEXT: movs r5, #0 -; CHECK-NEXT: vmov q4, q2 +; CHECK-NEXT: vmov q4, q3 ; CHECK-NEXT: vshlc q4, r5, #32 -; CHECK-NEXT: vldrw.u32 q1, [r12, #8] -; CHECK-NEXT: vmov q5, q1 +; CHECK-NEXT: vldrw.u32 q2, [r12, #8] +; CHECK-NEXT: vmov q5, q2 ; CHECK-NEXT: vshlc q5, r5, #32 -; CHECK-NEXT: vldrw.u32 q3, [r6] -; CHECK-NEXT: vmov.f32 s14, s0 +; CHECK-NEXT: vldrw.u32 q1, [r6] +; CHECK-NEXT: vmov.f32 s6, s0 ; CHECK-NEXT: mov r5, r2 -; CHECK-NEXT: vmov.f32 s15, s0 +; CHECK-NEXT: vmov.f32 s7, s0 ; CHECK-NEXT: wls lr, r8, .LBB20_6 ; CHECK-NEXT: @ %bb.4: @ %while.body.preheader ; CHECK-NEXT: @ in Loop: Header=BB20_3 Depth=1 -; CHECK-NEXT: vmov q6, q3 ; CHECK-NEXT: mov r5, r2 ; CHECK-NEXT: .LBB20_5: @ %while.body ; CHECK-NEXT: @ Parent Loop BB20_3 Depth=1 ; CHECK-NEXT: @ => This Inner Loop Header: Depth=2 ; CHECK-NEXT: ldrd r7, r4, [r1], #8 -; CHECK-NEXT: vfma.f32 q6, q2, r7 -; CHECK-NEXT: vmov r7, s24 -; CHECK-NEXT: vmov q3, q6 -; CHECK-NEXT: vfma.f32 q3, q1, r7 -; CHECK-NEXT: vstr s24, [r5] -; CHECK-NEXT: vmov.f32 s15, s0 -; CHECK-NEXT: vfma.f32 q3, q4, r4 -; CHECK-NEXT: vmov r4, s13 -; CHECK-NEXT: vstr s13, [r5, #4] -; CHECK-NEXT: vfma.f32 q3, q5, r4 +; CHECK-NEXT: vfma.f32 q1, q3, r7 +; CHECK-NEXT: vmov r7, s4 +; CHECK-NEXT: vmov.f32 s2, s4 +; CHECK-NEXT: vfma.f32 q1, q2, r7 +; CHECK-NEXT: vmov.f32 s7, s0 +; CHECK-NEXT: vfma.f32 q1, q4, r4 +; CHECK-NEXT: vmov r4, s5 +; CHECK-NEXT: vstr s5, [r5, #4] +; CHECK-NEXT: vfma.f32 q1, q5, r4 +; CHECK-NEXT: vmov.f32 s4, s6 +; CHECK-NEXT: vmov.f32 s5, s7 +; CHECK-NEXT: vmov.f32 s6, s0 +; CHECK-NEXT: vstr s2, [r5] ; CHECK-NEXT: adds r5, #8 -; CHECK-NEXT: vmov.f32 s12, s14 -; CHECK-NEXT: vmov.f32 s13, s15 -; CHECK-NEXT: vmov.f32 s14, s0 -; CHECK-NEXT: vmov q6, q3 ; CHECK-NEXT: le lr, .LBB20_5 ; CHECK-NEXT: .LBB20_6: @ %while.end ; CHECK-NEXT: @ in Loop: Header=BB20_3 Depth=1 @@ -1966,14 +1959,14 @@ define void @arm_biquad_cascade_df2T_f32(ptr nocapture readonly %S, ptr nocaptur ; CHECK-NEXT: @ %bb.7: @ %if.then ; CHECK-NEXT: @ in Loop: Header=BB20_3 Depth=1 ; CHECK-NEXT: ldr r1, [r1] -; CHECK-NEXT: vfma.f32 q3, q2, r1 -; CHECK-NEXT: vmov r1, s12 -; CHECK-NEXT: vstr s12, [r5] -; CHECK-NEXT: vfma.f32 q3, q1, r1 -; CHECK-NEXT: vstr s13, [r6] +; CHECK-NEXT: vfma.f32 q1, q3, r1 +; CHECK-NEXT: vmov r1, s4 +; CHECK-NEXT: vstr s4, [r5] +; CHECK-NEXT: vfma.f32 q1, q2, r1 +; CHECK-NEXT: vstr s5, [r6] ; CHECK-NEXT: b .LBB20_2 ; CHECK-NEXT: .LBB20_8: @ %do.end -; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13} +; CHECK-NEXT: vpop {d8, d9, d10, d11} ; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, pc} ; CHECK-NEXT: .p2align 2 ; CHECK-NEXT: @ %bb.9: |
