diff options
Diffstat (limited to 'llvm/test/CodeGen/Thumb2/mve-float32regloops.ll')
-rw-r--r-- | llvm/test/CodeGen/Thumb2/mve-float32regloops.ll | 100 |
1 files changed, 49 insertions, 51 deletions
diff --git a/llvm/test/CodeGen/Thumb2/mve-float32regloops.ll b/llvm/test/CodeGen/Thumb2/mve-float32regloops.ll index 28166e4..f7b4548 100644 --- a/llvm/test/CodeGen/Thumb2/mve-float32regloops.ll +++ b/llvm/test/CodeGen/Thumb2/mve-float32regloops.ll @@ -995,46 +995,44 @@ define void @fir(ptr nocapture readonly %S, ptr nocapture readonly %pSrc, ptr no ; CHECK-NEXT: sub sp, #4 ; CHECK-NEXT: .vsave {d8, d9, d10, d11, d12, d13} ; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13} -; CHECK-NEXT: .pad #32 -; CHECK-NEXT: sub sp, #32 +; CHECK-NEXT: .pad #24 +; CHECK-NEXT: sub sp, #24 ; CHECK-NEXT: ldrh r6, [r0] -; CHECK-NEXT: movs r5, #1 -; CHECK-NEXT: ldrd r4, r10, [r0, #4] +; CHECK-NEXT: movs r4, #1 +; CHECK-NEXT: ldrd r7, r10, [r0, #4] ; CHECK-NEXT: sub.w r0, r6, #8 ; CHECK-NEXT: add.w r3, r0, r0, lsr #29 ; CHECK-NEXT: and r0, r0, #7 -; CHECK-NEXT: asrs r7, r3, #3 -; CHECK-NEXT: cmp r7, #1 +; CHECK-NEXT: asrs r5, r3, #3 +; CHECK-NEXT: cmp r5, #1 ; CHECK-NEXT: it gt -; CHECK-NEXT: asrgt r5, r3, #3 -; CHECK-NEXT: add.w r3, r4, r6, lsl #2 +; CHECK-NEXT: asrgt r4, r3, #3 +; CHECK-NEXT: add.w r3, r7, r6, lsl #2 ; CHECK-NEXT: sub.w r9, r3, #4 ; CHECK-NEXT: rsbs r3, r6, #0 -; CHECK-NEXT: str r3, [sp, #12] @ 4-byte Spill -; CHECK-NEXT: add.w r3, r10, #32 -; CHECK-NEXT: str r5, [sp, #4] @ 4-byte Spill -; CHECK-NEXT: str r6, [sp, #16] @ 4-byte Spill -; CHECK-NEXT: str r3, [sp, #8] @ 4-byte Spill -; CHECK-NEXT: str r0, [sp, #20] @ 4-byte Spill +; CHECK-NEXT: str r4, [sp] @ 4-byte Spill +; CHECK-NEXT: str r6, [sp, #8] @ 4-byte Spill +; CHECK-NEXT: str r3, [sp, #4] @ 4-byte Spill +; CHECK-NEXT: str r0, [sp, #12] @ 4-byte Spill ; CHECK-NEXT: b .LBB16_6 ; CHECK-NEXT: .LBB16_3: @ %while.end.loopexit ; CHECK-NEXT: @ in Loop: Header=BB16_6 Depth=1 -; CHECK-NEXT: ldr r0, [sp, #20] @ 4-byte Reload -; CHECK-NEXT: add.w r4, r4, r0, lsl #2 +; CHECK-NEXT: ldr r0, [sp, #12] @ 4-byte Reload +; CHECK-NEXT: add.w r7, r7, r0, lsl #2 ; CHECK-NEXT: b .LBB16_5 ; CHECK-NEXT: .LBB16_4: @ %for.end ; CHECK-NEXT: @ in Loop: Header=BB16_6 Depth=1 -; CHECK-NEXT: ldr r1, [sp, #28] @ 4-byte Reload -; CHECK-NEXT: ldrd r0, r9, [sp, #20] @ 8-byte Folded Reload +; CHECK-NEXT: ldr r1, [sp, #20] @ 4-byte Reload +; CHECK-NEXT: ldrd r0, r9, [sp, #12] @ 8-byte Folded Reload ; CHECK-NEXT: wls lr, r0, .LBB16_5 ; CHECK-NEXT: b .LBB16_10 ; CHECK-NEXT: .LBB16_5: @ %while.end ; CHECK-NEXT: @ in Loop: Header=BB16_6 Depth=1 -; CHECK-NEXT: ldr r0, [sp, #12] @ 4-byte Reload +; CHECK-NEXT: ldr r0, [sp, #4] @ 4-byte Reload ; CHECK-NEXT: subs.w r12, r12, #1 ; CHECK-NEXT: vstrb.8 q0, [r2], #16 -; CHECK-NEXT: add.w r0, r4, r0, lsl #2 -; CHECK-NEXT: add.w r4, r0, #16 +; CHECK-NEXT: add.w r0, r7, r0, lsl #2 +; CHECK-NEXT: add.w r7, r0, #16 ; CHECK-NEXT: beq .LBB16_12 ; CHECK-NEXT: .LBB16_6: @ %while.body ; CHECK-NEXT: @ =>This Loop Header: Depth=1 @@ -1042,76 +1040,76 @@ define void @fir(ptr nocapture readonly %S, ptr nocapture readonly %pSrc, ptr no ; CHECK-NEXT: @ Child Loop BB16_11 Depth 2 ; CHECK-NEXT: add.w lr, r10, #8 ; CHECK-NEXT: vldrw.u32 q0, [r1], #16 -; CHECK-NEXT: ldrd r3, r7, [r10] +; CHECK-NEXT: ldrd r3, r4, [r10] ; CHECK-NEXT: ldm.w lr, {r0, r5, r6, lr} ; CHECK-NEXT: ldrd r11, r8, [r10, #24] ; CHECK-NEXT: vstrb.8 q0, [r9], #16 -; CHECK-NEXT: vldrw.u32 q0, [r4], #32 -; CHECK-NEXT: str r1, [sp, #28] @ 4-byte Spill -; CHECK-NEXT: str.w r9, [sp, #24] @ 4-byte Spill -; CHECK-NEXT: vldrw.u32 q1, [r4, #-28] +; CHECK-NEXT: vldrw.u32 q0, [r7], #32 +; CHECK-NEXT: str r1, [sp, #20] @ 4-byte Spill +; CHECK-NEXT: str.w r9, [sp, #16] @ 4-byte Spill +; CHECK-NEXT: vldrw.u32 q1, [r7, #-28] ; CHECK-NEXT: vmul.f32 q0, q0, r3 -; CHECK-NEXT: vldrw.u32 q6, [r4, #-24] -; CHECK-NEXT: vldrw.u32 q4, [r4, #-20] -; CHECK-NEXT: vfma.f32 q0, q1, r7 -; CHECK-NEXT: vldrw.u32 q5, [r4, #-16] +; CHECK-NEXT: vldrw.u32 q6, [r7, #-24] +; CHECK-NEXT: vldrw.u32 q4, [r7, #-20] +; CHECK-NEXT: vfma.f32 q0, q1, r4 +; CHECK-NEXT: vldrw.u32 q5, [r7, #-16] ; CHECK-NEXT: vfma.f32 q0, q6, r0 -; CHECK-NEXT: vldrw.u32 q2, [r4, #-12] +; CHECK-NEXT: vldrw.u32 q2, [r7, #-12] ; CHECK-NEXT: vfma.f32 q0, q4, r5 -; CHECK-NEXT: vldrw.u32 q3, [r4, #-8] +; CHECK-NEXT: vldrw.u32 q3, [r7, #-8] ; CHECK-NEXT: vfma.f32 q0, q5, r6 -; CHECK-NEXT: vldrw.u32 q1, [r4, #-4] +; CHECK-NEXT: vldrw.u32 q1, [r7, #-4] ; CHECK-NEXT: vfma.f32 q0, q2, lr -; CHECK-NEXT: ldr r0, [sp, #16] @ 4-byte Reload +; CHECK-NEXT: ldr r0, [sp, #8] @ 4-byte Reload ; CHECK-NEXT: vfma.f32 q0, q3, r11 ; CHECK-NEXT: vfma.f32 q0, q1, r8 ; CHECK-NEXT: cmp r0, #16 ; CHECK-NEXT: blo .LBB16_9 ; CHECK-NEXT: @ %bb.7: @ %for.body.preheader ; CHECK-NEXT: @ in Loop: Header=BB16_6 Depth=1 -; CHECK-NEXT: ldr r0, [sp, #4] @ 4-byte Reload +; CHECK-NEXT: ldr r0, [sp] @ 4-byte Reload +; CHECK-NEXT: add.w r4, r10, #32 ; CHECK-NEXT: dls lr, r0 -; CHECK-NEXT: ldr r7, [sp, #8] @ 4-byte Reload ; CHECK-NEXT: .LBB16_8: @ %for.body ; CHECK-NEXT: @ Parent Loop BB16_6 Depth=1 ; CHECK-NEXT: @ => This Inner Loop Header: Depth=2 -; CHECK-NEXT: ldm.w r7, {r0, r3, r5, r6, r8, r11} -; CHECK-NEXT: vldrw.u32 q1, [r4], #32 -; CHECK-NEXT: vldrw.u32 q6, [r4, #-24] -; CHECK-NEXT: vldrw.u32 q4, [r4, #-20] +; CHECK-NEXT: ldm.w r4, {r0, r3, r5, r6, r8, r11} +; CHECK-NEXT: vldrw.u32 q1, [r7], #32 +; CHECK-NEXT: vldrw.u32 q6, [r7, #-24] +; CHECK-NEXT: vldrw.u32 q4, [r7, #-20] ; CHECK-NEXT: vfma.f32 q0, q1, r0 -; CHECK-NEXT: vldrw.u32 q1, [r4, #-28] -; CHECK-NEXT: vldrw.u32 q5, [r4, #-16] -; CHECK-NEXT: vldrw.u32 q2, [r4, #-12] +; CHECK-NEXT: vldrw.u32 q1, [r7, #-28] +; CHECK-NEXT: vldrw.u32 q5, [r7, #-16] +; CHECK-NEXT: vldrw.u32 q2, [r7, #-12] ; CHECK-NEXT: vfma.f32 q0, q1, r3 -; CHECK-NEXT: ldrd r9, r1, [r7, #24] +; CHECK-NEXT: ldrd r9, r1, [r4, #24] ; CHECK-NEXT: vfma.f32 q0, q6, r5 -; CHECK-NEXT: vldrw.u32 q3, [r4, #-8] +; CHECK-NEXT: vldrw.u32 q3, [r7, #-8] ; CHECK-NEXT: vfma.f32 q0, q4, r6 -; CHECK-NEXT: vldrw.u32 q1, [r4, #-4] +; CHECK-NEXT: vldrw.u32 q1, [r7, #-4] ; CHECK-NEXT: vfma.f32 q0, q5, r8 -; CHECK-NEXT: adds r7, #32 +; CHECK-NEXT: adds r4, #32 ; CHECK-NEXT: vfma.f32 q0, q2, r11 ; CHECK-NEXT: vfma.f32 q0, q3, r9 ; CHECK-NEXT: vfma.f32 q0, q1, r1 ; CHECK-NEXT: le lr, .LBB16_8 ; CHECK-NEXT: b .LBB16_4 ; CHECK-NEXT: .LBB16_9: @ in Loop: Header=BB16_6 Depth=1 -; CHECK-NEXT: ldr r7, [sp, #8] @ 4-byte Reload +; CHECK-NEXT: add.w r4, r10, #32 ; CHECK-NEXT: b .LBB16_4 ; CHECK-NEXT: .LBB16_10: @ %while.body76.preheader ; CHECK-NEXT: @ in Loop: Header=BB16_6 Depth=1 -; CHECK-NEXT: mov r3, r4 +; CHECK-NEXT: mov r3, r7 ; CHECK-NEXT: .LBB16_11: @ %while.body76 ; CHECK-NEXT: @ Parent Loop BB16_6 Depth=1 ; CHECK-NEXT: @ => This Inner Loop Header: Depth=2 -; CHECK-NEXT: ldr r0, [r7], #4 +; CHECK-NEXT: ldr r0, [r4], #4 ; CHECK-NEXT: vldrw.u32 q1, [r3], #4 ; CHECK-NEXT: vfma.f32 q0, q1, r0 ; CHECK-NEXT: le lr, .LBB16_11 ; CHECK-NEXT: b .LBB16_3 ; CHECK-NEXT: .LBB16_12: -; CHECK-NEXT: add sp, #32 +; CHECK-NEXT: add sp, #24 ; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13} ; CHECK-NEXT: add sp, #4 ; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, lr} |