aboutsummaryrefslogtreecommitdiff
path: root/llvm
diff options
context:
space:
mode:
authorDavid Green <david.green@arm.com>2024-07-23 16:34:09 +0100
committerGitHub <noreply@github.com>2024-07-23 16:34:09 +0100
commitb42fe6740ec696dca0e3dc914d2638088caa3f53 (patch)
tree3186437b2b10b2d70a3747946c620df97ca07bce /llvm
parent1a3cfe5b9dc9c80a375506262b54b51d929df52d (diff)
downloadllvm-b42fe6740ec696dca0e3dc914d2638088caa3f53.zip
llvm-b42fe6740ec696dca0e3dc914d2638088caa3f53.tar.gz
llvm-b42fe6740ec696dca0e3dc914d2638088caa3f53.tar.bz2
[DAG] Add users of operand of simplified extract_vector_elt to worklist (#100074)
This helps to ensure we revisit the last extract_element uses of a node so that it can be optimized away in cases such as extract(insert(scalartovec(x), 1), 0).
Diffstat (limited to 'llvm')
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp1
-rw-r--r--llvm/test/CodeGen/AArch64/arm64-subvector-extend.ll4
-rw-r--r--llvm/test/CodeGen/AArch64/arm64-vabs.ll37
-rw-r--r--llvm/test/CodeGen/AArch64/cmp-select-sign.ll27
-rw-r--r--llvm/test/CodeGen/AArch64/fptoi.ll542
-rw-r--r--llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll474
-rw-r--r--llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll362
-rw-r--r--llvm/test/CodeGen/AArch64/nontemporal-load.ll65
-rw-r--r--llvm/test/CodeGen/AArch64/sadd_sat_vec.ll15
-rw-r--r--llvm/test/CodeGen/AArch64/ssub_sat_vec.ll15
-rw-r--r--llvm/test/CodeGen/AArch64/uadd_sat_vec.ll11
-rw-r--r--llvm/test/CodeGen/AArch64/usub_sat_vec.ll11
-rw-r--r--llvm/test/CodeGen/Thumb2/mve-fptosi-sat-vector.ll560
-rw-r--r--llvm/test/CodeGen/Thumb2/mve-fptoui-sat-vector.ll461
-rw-r--r--llvm/test/CodeGen/Thumb2/mve-minmaxi.ll16
-rw-r--r--llvm/test/CodeGen/Thumb2/mve-vst3.ll20
-rw-r--r--llvm/test/CodeGen/Thumb2/mve-vst4.ll72
17 files changed, 1206 insertions, 1487 deletions
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index aa9032e..cd04400 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -22533,6 +22533,7 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
if (VecOp.getOpcode() == ISD::INSERT_VECTOR_ELT &&
Index == VecOp.getOperand(2)) {
SDValue Elt = VecOp.getOperand(1);
+ AddUsersToWorklist(VecOp.getNode());
return VecVT.isInteger() ? DAG.getAnyExtOrTrunc(Elt, DL, ScalarVT) : Elt;
}
diff --git a/llvm/test/CodeGen/AArch64/arm64-subvector-extend.ll b/llvm/test/CodeGen/AArch64/arm64-subvector-extend.ll
index 00cc6b2..abf2e12 100644
--- a/llvm/test/CodeGen/AArch64/arm64-subvector-extend.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-subvector-extend.ll
@@ -951,10 +951,8 @@ define <1 x i128> @sext_v1x64(<1 x i64> %arg) {
; CHECK-SD-LABEL: sext_v1x64:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-SD-NEXT: fmov x8, d0
-; CHECK-SD-NEXT: asr x1, x8, #63
-; CHECK-SD-NEXT: mov.d v0[1], x1
; CHECK-SD-NEXT: fmov x0, d0
+; CHECK-SD-NEXT: asr x1, x0, #63
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: sext_v1x64:
diff --git a/llvm/test/CodeGen/AArch64/arm64-vabs.ll b/llvm/test/CodeGen/AArch64/arm64-vabs.ll
index 178c229..62a79e3 100644
--- a/llvm/test/CodeGen/AArch64/arm64-vabs.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-vabs.ll
@@ -1802,28 +1802,25 @@ define <2 x i128> @uabd_i64(<2 x i64> %a, <2 x i64> %b) {
; CHECK-NEXT: mov.d x8, v0[1]
; CHECK-NEXT: mov.d x9, v1[1]
; CHECK-NEXT: fmov x10, d0
-; CHECK-NEXT: fmov x11, d1
-; CHECK-NEXT: asr x12, x10, #63
-; CHECK-NEXT: asr x13, x11, #63
-; CHECK-NEXT: subs x10, x10, x11
+; CHECK-NEXT: fmov x12, d1
+; CHECK-NEXT: asr x14, x10, #63
; CHECK-NEXT: asr x11, x8, #63
-; CHECK-NEXT: asr x14, x9, #63
-; CHECK-NEXT: sbc x12, x12, x13
+; CHECK-NEXT: asr x13, x9, #63
+; CHECK-NEXT: asr x15, x12, #63
; CHECK-NEXT: subs x8, x8, x9
-; CHECK-NEXT: sbc x9, x11, x14
-; CHECK-NEXT: asr x13, x12, #63
-; CHECK-NEXT: asr x11, x9, #63
-; CHECK-NEXT: eor x10, x10, x13
-; CHECK-NEXT: eor x8, x8, x11
-; CHECK-NEXT: eor x9, x9, x11
-; CHECK-NEXT: subs x2, x8, x11
-; CHECK-NEXT: eor x8, x12, x13
-; CHECK-NEXT: sbc x3, x9, x11
-; CHECK-NEXT: subs x9, x10, x13
-; CHECK-NEXT: fmov d0, x9
-; CHECK-NEXT: sbc x1, x8, x13
-; CHECK-NEXT: mov.d v0[1], x1
-; CHECK-NEXT: fmov x0, d0
+; CHECK-NEXT: sbc x9, x11, x13
+; CHECK-NEXT: subs x10, x10, x12
+; CHECK-NEXT: sbc x11, x14, x15
+; CHECK-NEXT: asr x13, x9, #63
+; CHECK-NEXT: asr x12, x11, #63
+; CHECK-NEXT: eor x8, x8, x13
+; CHECK-NEXT: eor x9, x9, x13
+; CHECK-NEXT: eor x10, x10, x12
+; CHECK-NEXT: eor x11, x11, x12
+; CHECK-NEXT: subs x0, x10, x12
+; CHECK-NEXT: sbc x1, x11, x12
+; CHECK-NEXT: subs x2, x8, x13
+; CHECK-NEXT: sbc x3, x9, x13
; CHECK-NEXT: ret
%aext = sext <2 x i64> %a to <2 x i128>
%bext = sext <2 x i64> %b to <2 x i128>
diff --git a/llvm/test/CodeGen/AArch64/cmp-select-sign.ll b/llvm/test/CodeGen/AArch64/cmp-select-sign.ll
index 22440b7..b4f179e 100644
--- a/llvm/test/CodeGen/AArch64/cmp-select-sign.ll
+++ b/llvm/test/CodeGen/AArch64/cmp-select-sign.ll
@@ -241,21 +241,18 @@ define <4 x i32> @not_sign_4xi32_3(<4 x i32> %a) {
define <4 x i65> @sign_4xi65(<4 x i65> %a) {
; CHECK-LABEL: sign_4xi65:
; CHECK: // %bb.0:
-; CHECK-NEXT: sbfx x8, x1, #0, #1
-; CHECK-NEXT: sbfx x9, x5, #0, #1
-; CHECK-NEXT: sbfx x10, x3, #0, #1
-; CHECK-NEXT: lsr x1, x8, #63
-; CHECK-NEXT: orr x8, x8, #0x1
-; CHECK-NEXT: lsr x3, x10, #63
-; CHECK-NEXT: fmov d0, x8
-; CHECK-NEXT: sbfx x8, x7, #0, #1
-; CHECK-NEXT: lsr x5, x9, #63
-; CHECK-NEXT: orr x2, x10, #0x1
-; CHECK-NEXT: orr x4, x9, #0x1
-; CHECK-NEXT: lsr x7, x8, #63
-; CHECK-NEXT: orr x6, x8, #0x1
-; CHECK-NEXT: mov v0.d[1], x1
-; CHECK-NEXT: fmov x0, d0
+; CHECK-NEXT: sbfx x8, x5, #0, #1
+; CHECK-NEXT: sbfx x9, x3, #0, #1
+; CHECK-NEXT: sbfx x10, x1, #0, #1
+; CHECK-NEXT: sbfx x11, x7, #0, #1
+; CHECK-NEXT: lsr x1, x10, #63
+; CHECK-NEXT: lsr x3, x9, #63
+; CHECK-NEXT: lsr x5, x8, #63
+; CHECK-NEXT: lsr x7, x11, #63
+; CHECK-NEXT: orr x0, x10, #0x1
+; CHECK-NEXT: orr x2, x9, #0x1
+; CHECK-NEXT: orr x4, x8, #0x1
+; CHECK-NEXT: orr x6, x11, #0x1
; CHECK-NEXT: ret
%c = icmp sgt <4 x i65> %a, <i65 -1, i65 -1, i65 -1, i65 -1>
%res = select <4 x i1> %c, <4 x i65> <i65 1, i65 1, i65 1, i65 1>, <4 x i65 > <i65 -1, i65 -1, i65 -1, i65 -1>
diff --git a/llvm/test/CodeGen/AArch64/fptoi.ll b/llvm/test/CodeGen/AArch64/fptoi.ll
index 4723ac0..0c88059 100644
--- a/llvm/test/CodeGen/AArch64/fptoi.ll
+++ b/llvm/test/CodeGen/AArch64/fptoi.ll
@@ -2287,20 +2287,19 @@ define <2 x i128> @fptos_v2f64_v2i128(<2 x double> %a) {
; CHECK-SD-NEXT: .cfi_offset w20, -16
; CHECK-SD-NEXT: .cfi_offset w30, -32
; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill
-; CHECK-SD-NEXT: mov d0, v0.d[1]
+; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-SD-NEXT: bl __fixdfti
; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload
; CHECK-SD-NEXT: mov x19, x0
; CHECK-SD-NEXT: mov x20, x1
-; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-SD-NEXT: mov d0, v0.d[1]
; CHECK-SD-NEXT: bl __fixdfti
-; CHECK-SD-NEXT: fmov d0, x0
-; CHECK-SD-NEXT: mov x2, x19
-; CHECK-SD-NEXT: mov x3, x20
+; CHECK-SD-NEXT: mov x2, x0
+; CHECK-SD-NEXT: mov x3, x1
+; CHECK-SD-NEXT: mov x0, x19
+; CHECK-SD-NEXT: mov x1, x20
; CHECK-SD-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload
; CHECK-SD-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload
-; CHECK-SD-NEXT: mov v0.d[1], x1
-; CHECK-SD-NEXT: fmov x0, d0
; CHECK-SD-NEXT: add sp, sp, #48
; CHECK-SD-NEXT: ret
;
@@ -2345,20 +2344,19 @@ define <2 x i128> @fptou_v2f64_v2i128(<2 x double> %a) {
; CHECK-SD-NEXT: .cfi_offset w20, -16
; CHECK-SD-NEXT: .cfi_offset w30, -32
; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill
-; CHECK-SD-NEXT: mov d0, v0.d[1]
+; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-SD-NEXT: bl __fixunsdfti
; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload
; CHECK-SD-NEXT: mov x19, x0
; CHECK-SD-NEXT: mov x20, x1
-; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-SD-NEXT: mov d0, v0.d[1]
; CHECK-SD-NEXT: bl __fixunsdfti
-; CHECK-SD-NEXT: fmov d0, x0
-; CHECK-SD-NEXT: mov x2, x19
-; CHECK-SD-NEXT: mov x3, x20
+; CHECK-SD-NEXT: mov x2, x0
+; CHECK-SD-NEXT: mov x3, x1
+; CHECK-SD-NEXT: mov x0, x19
+; CHECK-SD-NEXT: mov x1, x20
; CHECK-SD-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload
; CHECK-SD-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload
-; CHECK-SD-NEXT: mov v0.d[1], x1
-; CHECK-SD-NEXT: fmov x0, d0
; CHECK-SD-NEXT: add sp, sp, #48
; CHECK-SD-NEXT: ret
;
@@ -2407,28 +2405,26 @@ define <3 x i128> @fptos_v3f64_v3i128(<3 x double> %a) {
; CHECK-SD-NEXT: .cfi_offset w30, -48
; CHECK-SD-NEXT: .cfi_offset b8, -56
; CHECK-SD-NEXT: .cfi_offset b9, -64
-; CHECK-SD-NEXT: fmov d9, d0
-; CHECK-SD-NEXT: fmov d0, d1
; CHECK-SD-NEXT: fmov d8, d2
+; CHECK-SD-NEXT: fmov d9, d1
; CHECK-SD-NEXT: bl __fixdfti
-; CHECK-SD-NEXT: fmov d0, d8
+; CHECK-SD-NEXT: fmov d0, d9
; CHECK-SD-NEXT: mov x19, x0
; CHECK-SD-NEXT: mov x20, x1
; CHECK-SD-NEXT: bl __fixdfti
-; CHECK-SD-NEXT: fmov d0, d9
+; CHECK-SD-NEXT: fmov d0, d8
; CHECK-SD-NEXT: mov x21, x0
; CHECK-SD-NEXT: mov x22, x1
; CHECK-SD-NEXT: bl __fixdfti
-; CHECK-SD-NEXT: fmov d0, x0
-; CHECK-SD-NEXT: mov x2, x19
-; CHECK-SD-NEXT: mov x3, x20
-; CHECK-SD-NEXT: mov x4, x21
-; CHECK-SD-NEXT: mov x5, x22
-; CHECK-SD-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload
+; CHECK-SD-NEXT: mov x4, x0
+; CHECK-SD-NEXT: mov x5, x1
+; CHECK-SD-NEXT: mov x0, x19
+; CHECK-SD-NEXT: mov x1, x20
+; CHECK-SD-NEXT: mov x2, x21
+; CHECK-SD-NEXT: mov x3, x22
; CHECK-SD-NEXT: ldp x20, x19, [sp, #48] // 16-byte Folded Reload
-; CHECK-SD-NEXT: mov v0.d[1], x1
+; CHECK-SD-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload
; CHECK-SD-NEXT: ldp x22, x21, [sp, #32] // 16-byte Folded Reload
-; CHECK-SD-NEXT: fmov x0, d0
; CHECK-SD-NEXT: ldp d9, d8, [sp], #64 // 16-byte Folded Reload
; CHECK-SD-NEXT: ret
;
@@ -2488,28 +2484,26 @@ define <3 x i128> @fptou_v3f64_v3i128(<3 x double> %a) {
; CHECK-SD-NEXT: .cfi_offset w30, -48
; CHECK-SD-NEXT: .cfi_offset b8, -56
; CHECK-SD-NEXT: .cfi_offset b9, -64
-; CHECK-SD-NEXT: fmov d9, d0
-; CHECK-SD-NEXT: fmov d0, d1
; CHECK-SD-NEXT: fmov d8, d2
+; CHECK-SD-NEXT: fmov d9, d1
; CHECK-SD-NEXT: bl __fixunsdfti
-; CHECK-SD-NEXT: fmov d0, d8
+; CHECK-SD-NEXT: fmov d0, d9
; CHECK-SD-NEXT: mov x19, x0
; CHECK-SD-NEXT: mov x20, x1
; CHECK-SD-NEXT: bl __fixunsdfti
-; CHECK-SD-NEXT: fmov d0, d9
+; CHECK-SD-NEXT: fmov d0, d8
; CHECK-SD-NEXT: mov x21, x0
; CHECK-SD-NEXT: mov x22, x1
; CHECK-SD-NEXT: bl __fixunsdfti
-; CHECK-SD-NEXT: fmov d0, x0
-; CHECK-SD-NEXT: mov x2, x19
-; CHECK-SD-NEXT: mov x3, x20
-; CHECK-SD-NEXT: mov x4, x21
-; CHECK-SD-NEXT: mov x5, x22
-; CHECK-SD-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload
+; CHECK-SD-NEXT: mov x4, x0
+; CHECK-SD-NEXT: mov x5, x1
+; CHECK-SD-NEXT: mov x0, x19
+; CHECK-SD-NEXT: mov x1, x20
+; CHECK-SD-NEXT: mov x2, x21
+; CHECK-SD-NEXT: mov x3, x22
; CHECK-SD-NEXT: ldp x20, x19, [sp, #48] // 16-byte Folded Reload
-; CHECK-SD-NEXT: mov v0.d[1], x1
+; CHECK-SD-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload
; CHECK-SD-NEXT: ldp x22, x21, [sp, #32] // 16-byte Folded Reload
-; CHECK-SD-NEXT: fmov x0, d0
; CHECK-SD-NEXT: ldp d9, d8, [sp], #64 // 16-byte Folded Reload
; CHECK-SD-NEXT: ret
;
@@ -3694,20 +3688,19 @@ define <2 x i128> @fptos_v2f32_v2i128(<2 x float> %a) {
; CHECK-SD-NEXT: .cfi_offset w30, -32
; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill
-; CHECK-SD-NEXT: mov s0, v0.s[1]
+; CHECK-SD-NEXT: // kill: def $s0 killed $s0 killed $q0
; CHECK-SD-NEXT: bl __fixsfti
; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload
; CHECK-SD-NEXT: mov x19, x0
; CHECK-SD-NEXT: mov x20, x1
-; CHECK-SD-NEXT: // kill: def $s0 killed $s0 killed $q0
+; CHECK-SD-NEXT: mov s0, v0.s[1]
; CHECK-SD-NEXT: bl __fixsfti
-; CHECK-SD-NEXT: fmov d0, x0
-; CHECK-SD-NEXT: mov x2, x19
-; CHECK-SD-NEXT: mov x3, x20
+; CHECK-SD-NEXT: mov x2, x0
+; CHECK-SD-NEXT: mov x3, x1
+; CHECK-SD-NEXT: mov x0, x19
+; CHECK-SD-NEXT: mov x1, x20
; CHECK-SD-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload
; CHECK-SD-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload
-; CHECK-SD-NEXT: mov v0.d[1], x1
-; CHECK-SD-NEXT: fmov x0, d0
; CHECK-SD-NEXT: add sp, sp, #48
; CHECK-SD-NEXT: ret
;
@@ -3754,20 +3747,19 @@ define <2 x i128> @fptou_v2f32_v2i128(<2 x float> %a) {
; CHECK-SD-NEXT: .cfi_offset w30, -32
; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill
-; CHECK-SD-NEXT: mov s0, v0.s[1]
+; CHECK-SD-NEXT: // kill: def $s0 killed $s0 killed $q0
; CHECK-SD-NEXT: bl __fixunssfti
; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload
; CHECK-SD-NEXT: mov x19, x0
; CHECK-SD-NEXT: mov x20, x1
-; CHECK-SD-NEXT: // kill: def $s0 killed $s0 killed $q0
+; CHECK-SD-NEXT: mov s0, v0.s[1]
; CHECK-SD-NEXT: bl __fixunssfti
-; CHECK-SD-NEXT: fmov d0, x0
-; CHECK-SD-NEXT: mov x2, x19
-; CHECK-SD-NEXT: mov x3, x20
+; CHECK-SD-NEXT: mov x2, x0
+; CHECK-SD-NEXT: mov x3, x1
+; CHECK-SD-NEXT: mov x0, x19
+; CHECK-SD-NEXT: mov x1, x20
; CHECK-SD-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload
; CHECK-SD-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload
-; CHECK-SD-NEXT: mov v0.d[1], x1
-; CHECK-SD-NEXT: fmov x0, d0
; CHECK-SD-NEXT: add sp, sp, #48
; CHECK-SD-NEXT: ret
;
@@ -3822,23 +3814,22 @@ define <3 x i128> @fptos_v3f32_v3i128(<3 x float> %a) {
; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload
; CHECK-SD-NEXT: mov x19, x0
; CHECK-SD-NEXT: mov x20, x1
-; CHECK-SD-NEXT: mov s0, v0.s[1]
+; CHECK-SD-NEXT: // kill: def $s0 killed $s0 killed $q0
; CHECK-SD-NEXT: bl __fixsfti
; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload
; CHECK-SD-NEXT: mov x21, x0
; CHECK-SD-NEXT: mov x22, x1
-; CHECK-SD-NEXT: // kill: def $s0 killed $s0 killed $q0
+; CHECK-SD-NEXT: mov s0, v0.s[1]
; CHECK-SD-NEXT: bl __fixsfti
-; CHECK-SD-NEXT: fmov d0, x0
-; CHECK-SD-NEXT: mov x2, x21
-; CHECK-SD-NEXT: mov x3, x22
+; CHECK-SD-NEXT: mov x2, x0
+; CHECK-SD-NEXT: mov x3, x1
+; CHECK-SD-NEXT: mov x0, x21
+; CHECK-SD-NEXT: mov x1, x22
; CHECK-SD-NEXT: mov x4, x19
; CHECK-SD-NEXT: mov x5, x20
-; CHECK-SD-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload
; CHECK-SD-NEXT: ldp x20, x19, [sp, #48] // 16-byte Folded Reload
-; CHECK-SD-NEXT: mov v0.d[1], x1
+; CHECK-SD-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload
; CHECK-SD-NEXT: ldp x22, x21, [sp, #32] // 16-byte Folded Reload
-; CHECK-SD-NEXT: fmov x0, d0
; CHECK-SD-NEXT: add sp, sp, #64
; CHECK-SD-NEXT: ret
;
@@ -3904,23 +3895,22 @@ define <3 x i128> @fptou_v3f32_v3i128(<3 x float> %a) {
; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload
; CHECK-SD-NEXT: mov x19, x0
; CHECK-SD-NEXT: mov x20, x1
-; CHECK-SD-NEXT: mov s0, v0.s[1]
+; CHECK-SD-NEXT: // kill: def $s0 killed $s0 killed $q0
; CHECK-SD-NEXT: bl __fixunssfti
; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload
; CHECK-SD-NEXT: mov x21, x0
; CHECK-SD-NEXT: mov x22, x1
-; CHECK-SD-NEXT: // kill: def $s0 killed $s0 killed $q0
+; CHECK-SD-NEXT: mov s0, v0.s[1]
; CHECK-SD-NEXT: bl __fixunssfti
-; CHECK-SD-NEXT: fmov d0, x0
-; CHECK-SD-NEXT: mov x2, x21
-; CHECK-SD-NEXT: mov x3, x22
+; CHECK-SD-NEXT: mov x2, x0
+; CHECK-SD-NEXT: mov x3, x1
+; CHECK-SD-NEXT: mov x0, x21
+; CHECK-SD-NEXT: mov x1, x22
; CHECK-SD-NEXT: mov x4, x19
; CHECK-SD-NEXT: mov x5, x20
-; CHECK-SD-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload
; CHECK-SD-NEXT: ldp x20, x19, [sp, #48] // 16-byte Folded Reload
-; CHECK-SD-NEXT: mov v0.d[1], x1
+; CHECK-SD-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload
; CHECK-SD-NEXT: ldp x22, x21, [sp, #32] // 16-byte Folded Reload
-; CHECK-SD-NEXT: fmov x0, d0
; CHECK-SD-NEXT: add sp, sp, #64
; CHECK-SD-NEXT: ret
;
@@ -7034,20 +7024,19 @@ define <2 x i128> @fptos_v2f16_v2i128(<2 x half> %a) {
; CHECK-SD-NEXT: .cfi_offset w30, -32
; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill
-; CHECK-SD-NEXT: mov h0, v0.h[1]
+; CHECK-SD-NEXT: // kill: def $h0 killed $h0 killed $q0
; CHECK-SD-NEXT: bl __fixhfti
; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload
; CHECK-SD-NEXT: mov x19, x0
; CHECK-SD-NEXT: mov x20, x1
-; CHECK-SD-NEXT: // kill: def $h0 killed $h0 killed $q0
+; CHECK-SD-NEXT: mov h0, v0.h[1]
; CHECK-SD-NEXT: bl __fixhfti
-; CHECK-SD-NEXT: fmov d0, x0
-; CHECK-SD-NEXT: mov x2, x19
-; CHECK-SD-NEXT: mov x3, x20
+; CHECK-SD-NEXT: mov x2, x0
+; CHECK-SD-NEXT: mov x3, x1
+; CHECK-SD-NEXT: mov x0, x19
+; CHECK-SD-NEXT: mov x1, x20
; CHECK-SD-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload
; CHECK-SD-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload
-; CHECK-SD-NEXT: mov v0.d[1], x1
-; CHECK-SD-NEXT: fmov x0, d0
; CHECK-SD-NEXT: add sp, sp, #48
; CHECK-SD-NEXT: ret
;
@@ -7089,20 +7078,19 @@ define <2 x i128> @fptou_v2f16_v2i128(<2 x half> %a) {
; CHECK-SD-NEXT: .cfi_offset w30, -32
; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill
-; CHECK-SD-NEXT: mov h0, v0.h[1]
+; CHECK-SD-NEXT: // kill: def $h0 killed $h0 killed $q0
; CHECK-SD-NEXT: bl __fixunshfti
; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload
; CHECK-SD-NEXT: mov x19, x0
; CHECK-SD-NEXT: mov x20, x1
-; CHECK-SD-NEXT: // kill: def $h0 killed $h0 killed $q0
+; CHECK-SD-NEXT: mov h0, v0.h[1]
; CHECK-SD-NEXT: bl __fixunshfti
-; CHECK-SD-NEXT: fmov d0, x0
-; CHECK-SD-NEXT: mov x2, x19
-; CHECK-SD-NEXT: mov x3, x20
+; CHECK-SD-NEXT: mov x2, x0
+; CHECK-SD-NEXT: mov x3, x1
+; CHECK-SD-NEXT: mov x0, x19
+; CHECK-SD-NEXT: mov x1, x20
; CHECK-SD-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload
; CHECK-SD-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload
-; CHECK-SD-NEXT: mov v0.d[1], x1
-; CHECK-SD-NEXT: fmov x0, d0
; CHECK-SD-NEXT: add sp, sp, #48
; CHECK-SD-NEXT: ret
;
@@ -7147,28 +7135,27 @@ define <3 x i128> @fptos_v3f16_v3i128(<3 x half> %a) {
; CHECK-SD-NEXT: .cfi_offset w30, -48
; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill
-; CHECK-SD-NEXT: mov h0, v0.h[1]
+; CHECK-SD-NEXT: // kill: def $h0 killed $h0 killed $q0
; CHECK-SD-NEXT: bl __fixhfti
; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload
; CHECK-SD-NEXT: mov x19, x0
; CHECK-SD-NEXT: mov x20, x1
-; CHECK-SD-NEXT: mov h0, v0.h[2]
+; CHECK-SD-NEXT: mov h0, v0.h[1]
; CHECK-SD-NEXT: bl __fixhfti
; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload
; CHECK-SD-NEXT: mov x21, x0
; CHECK-SD-NEXT: mov x22, x1
-; CHECK-SD-NEXT: // kill: def $h0 killed $h0 killed $q0
+; CHECK-SD-NEXT: mov h0, v0.h[2]
; CHECK-SD-NEXT: bl __fixhfti
-; CHECK-SD-NEXT: fmov d0, x0
-; CHECK-SD-NEXT: mov x2, x19
-; CHECK-SD-NEXT: mov x3, x20
-; CHECK-SD-NEXT: mov x4, x21
-; CHECK-SD-NEXT: mov x5, x22
-; CHECK-SD-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload
+; CHECK-SD-NEXT: mov x4, x0
+; CHECK-SD-NEXT: mov x5, x1
+; CHECK-SD-NEXT: mov x0, x19
+; CHECK-SD-NEXT: mov x1, x20
+; CHECK-SD-NEXT: mov x2, x21
+; CHECK-SD-NEXT: mov x3, x22
; CHECK-SD-NEXT: ldp x20, x19, [sp, #48] // 16-byte Folded Reload
-; CHECK-SD-NEXT: mov v0.d[1], x1
+; CHECK-SD-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload
; CHECK-SD-NEXT: ldp x22, x21, [sp, #32] // 16-byte Folded Reload
-; CHECK-SD-NEXT: fmov x0, d0
; CHECK-SD-NEXT: add sp, sp, #64
; CHECK-SD-NEXT: ret
;
@@ -7220,28 +7207,27 @@ define <3 x i128> @fptou_v3f16_v3i128(<3 x half> %a) {
; CHECK-SD-NEXT: .cfi_offset w30, -48
; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill
-; CHECK-SD-NEXT: mov h0, v0.h[1]
+; CHECK-SD-NEXT: // kill: def $h0 killed $h0 killed $q0
; CHECK-SD-NEXT: bl __fixunshfti
; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload
; CHECK-SD-NEXT: mov x19, x0
; CHECK-SD-NEXT: mov x20, x1
-; CHECK-SD-NEXT: mov h0, v0.h[2]
+; CHECK-SD-NEXT: mov h0, v0.h[1]
; CHECK-SD-NEXT: bl __fixunshfti
; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload
; CHECK-SD-NEXT: mov x21, x0
; CHECK-SD-NEXT: mov x22, x1
-; CHECK-SD-NEXT: // kill: def $h0 killed $h0 killed $q0
+; CHECK-SD-NEXT: mov h0, v0.h[2]
; CHECK-SD-NEXT: bl __fixunshfti
-; CHECK-SD-NEXT: fmov d0, x0
-; CHECK-SD-NEXT: mov x2, x19
-; CHECK-SD-NEXT: mov x3, x20
-; CHECK-SD-NEXT: mov x4, x21
-; CHECK-SD-NEXT: mov x5, x22
-; CHECK-SD-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload
+; CHECK-SD-NEXT: mov x4, x0
+; CHECK-SD-NEXT: mov x5, x1
+; CHECK-SD-NEXT: mov x0, x19
+; CHECK-SD-NEXT: mov x1, x20
+; CHECK-SD-NEXT: mov x2, x21
+; CHECK-SD-NEXT: mov x3, x22
; CHECK-SD-NEXT: ldp x20, x19, [sp, #48] // 16-byte Folded Reload
-; CHECK-SD-NEXT: mov v0.d[1], x1
+; CHECK-SD-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload
; CHECK-SD-NEXT: ldp x22, x21, [sp, #32] // 16-byte Folded Reload
-; CHECK-SD-NEXT: fmov x0, d0
; CHECK-SD-NEXT: add sp, sp, #64
; CHECK-SD-NEXT: ret
;
@@ -8083,260 +8069,136 @@ entry:
}
define <2 x i128> @fptos_v2f128_v2i128(<2 x fp128> %a) {
-; CHECK-SD-LABEL: fptos_v2f128_v2i128:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: sub sp, sp, #48
-; CHECK-SD-NEXT: str x30, [sp, #16] // 8-byte Folded Spill
-; CHECK-SD-NEXT: stp x20, x19, [sp, #32] // 16-byte Folded Spill
-; CHECK-SD-NEXT: .cfi_def_cfa_offset 48
-; CHECK-SD-NEXT: .cfi_offset w19, -8
-; CHECK-SD-NEXT: .cfi_offset w20, -16
-; CHECK-SD-NEXT: .cfi_offset w30, -32
-; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill
-; CHECK-SD-NEXT: mov v0.16b, v1.16b
-; CHECK-SD-NEXT: bl __fixtfti
-; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload
-; CHECK-SD-NEXT: mov x19, x0
-; CHECK-SD-NEXT: mov x20, x1
-; CHECK-SD-NEXT: bl __fixtfti
-; CHECK-SD-NEXT: fmov d0, x0
-; CHECK-SD-NEXT: mov x2, x19
-; CHECK-SD-NEXT: mov x3, x20
-; CHECK-SD-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload
-; CHECK-SD-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload
-; CHECK-SD-NEXT: mov v0.d[1], x1
-; CHECK-SD-NEXT: fmov x0, d0
-; CHECK-SD-NEXT: add sp, sp, #48
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: fptos_v2f128_v2i128:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: sub sp, sp, #48
-; CHECK-GI-NEXT: str x30, [sp, #16] // 8-byte Folded Spill
-; CHECK-GI-NEXT: stp x20, x19, [sp, #32] // 16-byte Folded Spill
-; CHECK-GI-NEXT: .cfi_def_cfa_offset 48
-; CHECK-GI-NEXT: .cfi_offset w19, -8
-; CHECK-GI-NEXT: .cfi_offset w20, -16
-; CHECK-GI-NEXT: .cfi_offset w30, -32
-; CHECK-GI-NEXT: str q1, [sp] // 16-byte Folded Spill
-; CHECK-GI-NEXT: bl __fixtfti
-; CHECK-GI-NEXT: ldr q0, [sp] // 16-byte Folded Reload
-; CHECK-GI-NEXT: mov x19, x0
-; CHECK-GI-NEXT: mov x20, x1
-; CHECK-GI-NEXT: bl __fixtfti
-; CHECK-GI-NEXT: mov x2, x0
-; CHECK-GI-NEXT: mov x3, x1
-; CHECK-GI-NEXT: mov x0, x19
-; CHECK-GI-NEXT: mov x1, x20
-; CHECK-GI-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload
-; CHECK-GI-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload
-; CHECK-GI-NEXT: add sp, sp, #48
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: fptos_v2f128_v2i128:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: sub sp, sp, #48
+; CHECK-NEXT: str x30, [sp, #16] // 8-byte Folded Spill
+; CHECK-NEXT: stp x20, x19, [sp, #32] // 16-byte Folded Spill
+; CHECK-NEXT: .cfi_def_cfa_offset 48
+; CHECK-NEXT: .cfi_offset w19, -8
+; CHECK-NEXT: .cfi_offset w20, -16
+; CHECK-NEXT: .cfi_offset w30, -32
+; CHECK-NEXT: str q1, [sp] // 16-byte Folded Spill
+; CHECK-NEXT: bl __fixtfti
+; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-NEXT: mov x19, x0
+; CHECK-NEXT: mov x20, x1
+; CHECK-NEXT: bl __fixtfti
+; CHECK-NEXT: mov x2, x0
+; CHECK-NEXT: mov x3, x1
+; CHECK-NEXT: mov x0, x19
+; CHECK-NEXT: mov x1, x20
+; CHECK-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload
+; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload
+; CHECK-NEXT: add sp, sp, #48
+; CHECK-NEXT: ret
entry:
%c = fptosi <2 x fp128> %a to <2 x i128>
ret <2 x i128> %c
}
define <2 x i128> @fptou_v2f128_v2i128(<2 x fp128> %a) {
-; CHECK-SD-LABEL: fptou_v2f128_v2i128:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: sub sp, sp, #48
-; CHECK-SD-NEXT: str x30, [sp, #16] // 8-byte Folded Spill
-; CHECK-SD-NEXT: stp x20, x19, [sp, #32] // 16-byte Folded Spill
-; CHECK-SD-NEXT: .cfi_def_cfa_offset 48
-; CHECK-SD-NEXT: .cfi_offset w19, -8
-; CHECK-SD-NEXT: .cfi_offset w20, -16
-; CHECK-SD-NEXT: .cfi_offset w30, -32
-; CHECK-SD-NEXT: str q0, [sp] // 16-byte Folded Spill
-; CHECK-SD-NEXT: mov v0.16b, v1.16b
-; CHECK-SD-NEXT: bl __fixunstfti
-; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload
-; CHECK-SD-NEXT: mov x19, x0
-; CHECK-SD-NEXT: mov x20, x1
-; CHECK-SD-NEXT: bl __fixunstfti
-; CHECK-SD-NEXT: fmov d0, x0
-; CHECK-SD-NEXT: mov x2, x19
-; CHECK-SD-NEXT: mov x3, x20
-; CHECK-SD-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload
-; CHECK-SD-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload
-; CHECK-SD-NEXT: mov v0.d[1], x1
-; CHECK-SD-NEXT: fmov x0, d0
-; CHECK-SD-NEXT: add sp, sp, #48
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: fptou_v2f128_v2i128:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: sub sp, sp, #48
-; CHECK-GI-NEXT: str x30, [sp, #16] // 8-byte Folded Spill
-; CHECK-GI-NEXT: stp x20, x19, [sp, #32] // 16-byte Folded Spill
-; CHECK-GI-NEXT: .cfi_def_cfa_offset 48
-; CHECK-GI-NEXT: .cfi_offset w19, -8
-; CHECK-GI-NEXT: .cfi_offset w20, -16
-; CHECK-GI-NEXT: .cfi_offset w30, -32
-; CHECK-GI-NEXT: str q1, [sp] // 16-byte Folded Spill
-; CHECK-GI-NEXT: bl __fixunstfti
-; CHECK-GI-NEXT: ldr q0, [sp] // 16-byte Folded Reload
-; CHECK-GI-NEXT: mov x19, x0
-; CHECK-GI-NEXT: mov x20, x1
-; CHECK-GI-NEXT: bl __fixunstfti
-; CHECK-GI-NEXT: mov x2, x0
-; CHECK-GI-NEXT: mov x3, x1
-; CHECK-GI-NEXT: mov x0, x19
-; CHECK-GI-NEXT: mov x1, x20
-; CHECK-GI-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload
-; CHECK-GI-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload
-; CHECK-GI-NEXT: add sp, sp, #48
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: fptou_v2f128_v2i128:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: sub sp, sp, #48
+; CHECK-NEXT: str x30, [sp, #16] // 8-byte Folded Spill
+; CHECK-NEXT: stp x20, x19, [sp, #32] // 16-byte Folded Spill
+; CHECK-NEXT: .cfi_def_cfa_offset 48
+; CHECK-NEXT: .cfi_offset w19, -8
+; CHECK-NEXT: .cfi_offset w20, -16
+; CHECK-NEXT: .cfi_offset w30, -32
+; CHECK-NEXT: str q1, [sp] // 16-byte Folded Spill
+; CHECK-NEXT: bl __fixunstfti
+; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-NEXT: mov x19, x0
+; CHECK-NEXT: mov x20, x1
+; CHECK-NEXT: bl __fixunstfti
+; CHECK-NEXT: mov x2, x0
+; CHECK-NEXT: mov x3, x1
+; CHECK-NEXT: mov x0, x19
+; CHECK-NEXT: mov x1, x20
+; CHECK-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload
+; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload
+; CHECK-NEXT: add sp, sp, #48
+; CHECK-NEXT: ret
entry:
%c = fptoui <2 x fp128> %a to <2 x i128>
ret <2 x i128> %c
}
define <3 x i128> @fptos_v3f128_v3i128(<3 x fp128> %a) {
-; CHECK-SD-LABEL: fptos_v3f128_v3i128:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: sub sp, sp, #80
-; CHECK-SD-NEXT: str x30, [sp, #32] // 8-byte Folded Spill
-; CHECK-SD-NEXT: stp x22, x21, [sp, #48] // 16-byte Folded Spill
-; CHECK-SD-NEXT: stp x20, x19, [sp, #64] // 16-byte Folded Spill
-; CHECK-SD-NEXT: .cfi_def_cfa_offset 80
-; CHECK-SD-NEXT: .cfi_offset w19, -8
-; CHECK-SD-NEXT: .cfi_offset w20, -16
-; CHECK-SD-NEXT: .cfi_offset w21, -24
-; CHECK-SD-NEXT: .cfi_offset w22, -32
-; CHECK-SD-NEXT: .cfi_offset w30, -48
-; CHECK-SD-NEXT: stp q2, q0, [sp] // 32-byte Folded Spill
-; CHECK-SD-NEXT: mov v0.16b, v1.16b
-; CHECK-SD-NEXT: bl __fixtfti
-; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload
-; CHECK-SD-NEXT: mov x19, x0
-; CHECK-SD-NEXT: mov x20, x1
-; CHECK-SD-NEXT: bl __fixtfti
-; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload
-; CHECK-SD-NEXT: mov x21, x0
-; CHECK-SD-NEXT: mov x22, x1
-; CHECK-SD-NEXT: bl __fixtfti
-; CHECK-SD-NEXT: fmov d0, x0
-; CHECK-SD-NEXT: mov x2, x19
-; CHECK-SD-NEXT: mov x3, x20
-; CHECK-SD-NEXT: mov x4, x21
-; CHECK-SD-NEXT: mov x5, x22
-; CHECK-SD-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload
-; CHECK-SD-NEXT: ldp x20, x19, [sp, #64] // 16-byte Folded Reload
-; CHECK-SD-NEXT: mov v0.d[1], x1
-; CHECK-SD-NEXT: ldp x22, x21, [sp, #48] // 16-byte Folded Reload
-; CHECK-SD-NEXT: fmov x0, d0
-; CHECK-SD-NEXT: add sp, sp, #80
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: fptos_v3f128_v3i128:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: sub sp, sp, #80
-; CHECK-GI-NEXT: str x30, [sp, #32] // 8-byte Folded Spill
-; CHECK-GI-NEXT: stp x22, x21, [sp, #48] // 16-byte Folded Spill
-; CHECK-GI-NEXT: stp x20, x19, [sp, #64] // 16-byte Folded Spill
-; CHECK-GI-NEXT: .cfi_def_cfa_offset 80
-; CHECK-GI-NEXT: .cfi_offset w19, -8
-; CHECK-GI-NEXT: .cfi_offset w20, -16
-; CHECK-GI-NEXT: .cfi_offset w21, -24
-; CHECK-GI-NEXT: .cfi_offset w22, -32
-; CHECK-GI-NEXT: .cfi_offset w30, -48
-; CHECK-GI-NEXT: stp q1, q2, [sp] // 32-byte Folded Spill
-; CHECK-GI-NEXT: bl __fixtfti
-; CHECK-GI-NEXT: ldr q0, [sp] // 16-byte Folded Reload
-; CHECK-GI-NEXT: mov x19, x0
-; CHECK-GI-NEXT: mov x20, x1
-; CHECK-GI-NEXT: bl __fixtfti
-; CHECK-GI-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload
-; CHECK-GI-NEXT: mov x21, x0
-; CHECK-GI-NEXT: mov x22, x1
-; CHECK-GI-NEXT: bl __fixtfti
-; CHECK-GI-NEXT: mov x4, x0
-; CHECK-GI-NEXT: mov x5, x1
-; CHECK-GI-NEXT: mov x0, x19
-; CHECK-GI-NEXT: mov x1, x20
-; CHECK-GI-NEXT: mov x2, x21
-; CHECK-GI-NEXT: mov x3, x22
-; CHECK-GI-NEXT: ldp x20, x19, [sp, #64] // 16-byte Folded Reload
-; CHECK-GI-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload
-; CHECK-GI-NEXT: ldp x22, x21, [sp, #48] // 16-byte Folded Reload
-; CHECK-GI-NEXT: add sp, sp, #80
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: fptos_v3f128_v3i128:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: sub sp, sp, #80
+; CHECK-NEXT: str x30, [sp, #32] // 8-byte Folded Spill
+; CHECK-NEXT: stp x22, x21, [sp, #48] // 16-byte Folded Spill
+; CHECK-NEXT: stp x20, x19, [sp, #64] // 16-byte Folded Spill
+; CHECK-NEXT: .cfi_def_cfa_offset 80
+; CHECK-NEXT: .cfi_offset w19, -8
+; CHECK-NEXT: .cfi_offset w20, -16
+; CHECK-NEXT: .cfi_offset w21, -24
+; CHECK-NEXT: .cfi_offset w22, -32
+; CHECK-NEXT: .cfi_offset w30, -48
+; CHECK-NEXT: stp q1, q2, [sp] // 32-byte Folded Spill
+; CHECK-NEXT: bl __fixtfti
+; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-NEXT: mov x19, x0
+; CHECK-NEXT: mov x20, x1
+; CHECK-NEXT: bl __fixtfti
+; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload
+; CHECK-NEXT: mov x21, x0
+; CHECK-NEXT: mov x22, x1
+; CHECK-NEXT: bl __fixtfti
+; CHECK-NEXT: mov x4, x0
+; CHECK-NEXT: mov x5, x1
+; CHECK-NEXT: mov x0, x19
+; CHECK-NEXT: mov x1, x20
+; CHECK-NEXT: mov x2, x21
+; CHECK-NEXT: mov x3, x22
+; CHECK-NEXT: ldp x20, x19, [sp, #64] // 16-byte Folded Reload
+; CHECK-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload
+; CHECK-NEXT: ldp x22, x21, [sp, #48] // 16-byte Folded Reload
+; CHECK-NEXT: add sp, sp, #80
+; CHECK-NEXT: ret
entry:
%c = fptosi <3 x fp128> %a to <3 x i128>
ret <3 x i128> %c
}
define <3 x i128> @fptou_v3f128_v3i128(<3 x fp128> %a) {
-; CHECK-SD-LABEL: fptou_v3f128_v3i128:
-; CHECK-SD: // %bb.0: // %entry
-; CHECK-SD-NEXT: sub sp, sp, #80
-; CHECK-SD-NEXT: str x30, [sp, #32] // 8-byte Folded Spill
-; CHECK-SD-NEXT: stp x22, x21, [sp, #48] // 16-byte Folded Spill
-; CHECK-SD-NEXT: stp x20, x19, [sp, #64] // 16-byte Folded Spill
-; CHECK-SD-NEXT: .cfi_def_cfa_offset 80
-; CHECK-SD-NEXT: .cfi_offset w19, -8
-; CHECK-SD-NEXT: .cfi_offset w20, -16
-; CHECK-SD-NEXT: .cfi_offset w21, -24
-; CHECK-SD-NEXT: .cfi_offset w22, -32
-; CHECK-SD-NEXT: .cfi_offset w30, -48
-; CHECK-SD-NEXT: stp q2, q0, [sp] // 32-byte Folded Spill
-; CHECK-SD-NEXT: mov v0.16b, v1.16b
-; CHECK-SD-NEXT: bl __fixunstfti
-; CHECK-SD-NEXT: ldr q0, [sp] // 16-byte Folded Reload
-; CHECK-SD-NEXT: mov x19, x0
-; CHECK-SD-NEXT: mov x20, x1
-; CHECK-SD-NEXT: bl __fixunstfti
-; CHECK-SD-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload
-; CHECK-SD-NEXT: mov x21, x0
-; CHECK-SD-NEXT: mov x22, x1
-; CHECK-SD-NEXT: bl __fixunstfti
-; CHECK-SD-NEXT: fmov d0, x0
-; CHECK-SD-NEXT: mov x2, x19
-; CHECK-SD-NEXT: mov x3, x20
-; CHECK-SD-NEXT: mov x4, x21
-; CHECK-SD-NEXT: mov x5, x22
-; CHECK-SD-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload
-; CHECK-SD-NEXT: ldp x20, x19, [sp, #64] // 16-byte Folded Reload
-; CHECK-SD-NEXT: mov v0.d[1], x1
-; CHECK-SD-NEXT: ldp x22, x21, [sp, #48] // 16-byte Folded Reload
-; CHECK-SD-NEXT: fmov x0, d0
-; CHECK-SD-NEXT: add sp, sp, #80
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: fptou_v3f128_v3i128:
-; CHECK-GI: // %bb.0: // %entry
-; CHECK-GI-NEXT: sub sp, sp, #80
-; CHECK-GI-NEXT: str x30, [sp, #32] // 8-byte Folded Spill
-; CHECK-GI-NEXT: stp x22, x21, [sp, #48] // 16-byte Folded Spill
-; CHECK-GI-NEXT: stp x20, x19, [sp, #64] // 16-byte Folded Spill
-; CHECK-GI-NEXT: .cfi_def_cfa_offset 80
-; CHECK-GI-NEXT: .cfi_offset w19, -8
-; CHECK-GI-NEXT: .cfi_offset w20, -16
-; CHECK-GI-NEXT: .cfi_offset w21, -24
-; CHECK-GI-NEXT: .cfi_offset w22, -32
-; CHECK-GI-NEXT: .cfi_offset w30, -48
-; CHECK-GI-NEXT: stp q1, q2, [sp] // 32-byte Folded Spill
-; CHECK-GI-NEXT: bl __fixunstfti
-; CHECK-GI-NEXT: ldr q0, [sp] // 16-byte Folded Reload
-; CHECK-GI-NEXT: mov x19, x0
-; CHECK-GI-NEXT: mov x20, x1
-; CHECK-GI-NEXT: bl __fixunstfti
-; CHECK-GI-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload
-; CHECK-GI-NEXT: mov x21, x0
-; CHECK-GI-NEXT: mov x22, x1
-; CHECK-GI-NEXT: bl __fixunstfti
-; CHECK-GI-NEXT: mov x4, x0
-; CHECK-GI-NEXT: mov x5, x1
-; CHECK-GI-NEXT: mov x0, x19
-; CHECK-GI-NEXT: mov x1, x20
-; CHECK-GI-NEXT: mov x2, x21
-; CHECK-GI-NEXT: mov x3, x22
-; CHECK-GI-NEXT: ldp x20, x19, [sp, #64] // 16-byte Folded Reload
-; CHECK-GI-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload
-; CHECK-GI-NEXT: ldp x22, x21, [sp, #48] // 16-byte Folded Reload
-; CHECK-GI-NEXT: add sp, sp, #80
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: fptou_v3f128_v3i128:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: sub sp, sp, #80
+; CHECK-NEXT: str x30, [sp, #32] // 8-byte Folded Spill
+; CHECK-NEXT: stp x22, x21, [sp, #48] // 16-byte Folded Spill
+; CHECK-NEXT: stp x20, x19, [sp, #64] // 16-byte Folded Spill
+; CHECK-NEXT: .cfi_def_cfa_offset 80
+; CHECK-NEXT: .cfi_offset w19, -8
+; CHECK-NEXT: .cfi_offset w20, -16
+; CHECK-NEXT: .cfi_offset w21, -24
+; CHECK-NEXT: .cfi_offset w22, -32
+; CHECK-NEXT: .cfi_offset w30, -48
+; CHECK-NEXT: stp q1, q2, [sp] // 32-byte Folded Spill
+; CHECK-NEXT: bl __fixunstfti
+; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-NEXT: mov x19, x0
+; CHECK-NEXT: mov x20, x1
+; CHECK-NEXT: bl __fixunstfti
+; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload
+; CHECK-NEXT: mov x21, x0
+; CHECK-NEXT: mov x22, x1
+; CHECK-NEXT: bl __fixunstfti
+; CHECK-NEXT: mov x4, x0
+; CHECK-NEXT: mov x5, x1
+; CHECK-NEXT: mov x0, x19
+; CHECK-NEXT: mov x1, x20
+; CHECK-NEXT: mov x2, x21
+; CHECK-NEXT: mov x3, x22
+; CHECK-NEXT: ldp x20, x19, [sp, #64] // 16-byte Folded Reload
+; CHECK-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload
+; CHECK-NEXT: ldp x22, x21, [sp, #48] // 16-byte Folded Reload
+; CHECK-NEXT: add sp, sp, #80
+; CHECK-NEXT: ret
entry:
%c = fptoui <3 x fp128> %a to <3 x i128>
ret <3 x i128> %c
diff --git a/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll b/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll
index d620a885..91c8b7f 100644
--- a/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll
+++ b/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll
@@ -819,47 +819,43 @@ define <2 x i100> @test_signed_v2f32_v2i100(<2 x float> %f) {
; CHECK-NEXT: .cfi_offset b9, -56
; CHECK-NEXT: .cfi_offset b10, -64
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT: mov s8, v0.s[1]
; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill
-; CHECK-NEXT: fmov s0, s8
+; CHECK-NEXT: // kill: def $s0 killed $s0 killed $q0
; CHECK-NEXT: bl __fixsfti
; CHECK-NEXT: movi v9.2s, #241, lsl #24
+; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
; CHECK-NEXT: mov w8, #1895825407 // =0x70ffffff
-; CHECK-NEXT: mov x21, #-34359738368 // =0xfffffff800000000
; CHECK-NEXT: fmov s10, w8
+; CHECK-NEXT: mov x21, #-34359738368 // =0xfffffff800000000
; CHECK-NEXT: mov x22, #34359738367 // =0x7ffffffff
-; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
-; CHECK-NEXT: // kill: def $s0 killed $s0 killed $q0
-; CHECK-NEXT: fcmp s8, s9
+; CHECK-NEXT: mov s8, v0.s[1]
+; CHECK-NEXT: fcmp s0, s9
; CHECK-NEXT: csel x8, xzr, x0, lt
; CHECK-NEXT: csel x9, x21, x1, lt
-; CHECK-NEXT: fcmp s8, s10
+; CHECK-NEXT: fcmp s0, s10
; CHECK-NEXT: csel x9, x22, x9, gt
; CHECK-NEXT: csinv x8, x8, xzr, le
-; CHECK-NEXT: fcmp s8, s8
+; CHECK-NEXT: fcmp s0, s0
+; CHECK-NEXT: fmov s0, s8
; CHECK-NEXT: csel x19, xzr, x8, vs
; CHECK-NEXT: csel x20, xzr, x9, vs
; CHECK-NEXT: bl __fixsfti
-; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
-; CHECK-NEXT: mov x2, x19
-; CHECK-NEXT: mov x3, x20
-; CHECK-NEXT: ldp x20, x19, [sp, #64] // 16-byte Folded Reload
+; CHECK-NEXT: fcmp s8, s9
; CHECK-NEXT: ldr x30, [sp, #40] // 8-byte Folded Reload
-; CHECK-NEXT: fcmp s0, s9
-; CHECK-NEXT: ldp d9, d8, [sp, #24] // 16-byte Folded Reload
-; CHECK-NEXT: csel x8, x21, x1, lt
-; CHECK-NEXT: csel x9, xzr, x0, lt
-; CHECK-NEXT: fcmp s0, s10
+; CHECK-NEXT: csel x8, xzr, x0, lt
+; CHECK-NEXT: csel x9, x21, x1, lt
+; CHECK-NEXT: fcmp s8, s10
+; CHECK-NEXT: mov x0, x19
+; CHECK-NEXT: mov x1, x20
; CHECK-NEXT: ldr d10, [sp, #16] // 8-byte Folded Reload
-; CHECK-NEXT: csinv x9, x9, xzr, le
-; CHECK-NEXT: csel x8, x22, x8, gt
-; CHECK-NEXT: fcmp s0, s0
+; CHECK-NEXT: ldp x20, x19, [sp, #64] // 16-byte Folded Reload
+; CHECK-NEXT: csel x9, x22, x9, gt
+; CHECK-NEXT: csinv x8, x8, xzr, le
+; CHECK-NEXT: fcmp s8, s8
; CHECK-NEXT: ldp x22, x21, [sp, #48] // 16-byte Folded Reload
-; CHECK-NEXT: csel x9, xzr, x9, vs
-; CHECK-NEXT: csel x1, xzr, x8, vs
-; CHECK-NEXT: fmov d0, x9
-; CHECK-NEXT: mov v0.d[1], x1
-; CHECK-NEXT: fmov x0, d0
+; CHECK-NEXT: ldp d9, d8, [sp, #24] // 16-byte Folded Reload
+; CHECK-NEXT: csel x2, xzr, x8, vs
+; CHECK-NEXT: csel x3, xzr, x9, vs
; CHECK-NEXT: add sp, sp, #80
; CHECK-NEXT: ret
%x = call <2 x i100> @llvm.fptosi.sat.v2f32.v2i100(<2 x float> %f)
@@ -885,47 +881,43 @@ define <2 x i128> @test_signed_v2f32_v2i128(<2 x float> %f) {
; CHECK-NEXT: .cfi_offset b9, -56
; CHECK-NEXT: .cfi_offset b10, -64
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT: mov s8, v0.s[1]
; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill
-; CHECK-NEXT: fmov s0, s8
+; CHECK-NEXT: // kill: def $s0 killed $s0 killed $q0
; CHECK-NEXT: bl __fixsfti
; CHECK-NEXT: movi v9.2s, #255, lsl #24
+; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
; CHECK-NEXT: mov w8, #2130706431 // =0x7effffff
-; CHECK-NEXT: mov x21, #-9223372036854775808 // =0x8000000000000000
; CHECK-NEXT: fmov s10, w8
+; CHECK-NEXT: mov x21, #-9223372036854775808 // =0x8000000000000000
; CHECK-NEXT: mov x22, #9223372036854775807 // =0x7fffffffffffffff
-; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
-; CHECK-NEXT: // kill: def $s0 killed $s0 killed $q0
-; CHECK-NEXT: fcmp s8, s9
+; CHECK-NEXT: mov s8, v0.s[1]
+; CHECK-NEXT: fcmp s0, s9
; CHECK-NEXT: csel x8, xzr, x0, lt
; CHECK-NEXT: csel x9, x21, x1, lt
-; CHECK-NEXT: fcmp s8, s10
+; CHECK-NEXT: fcmp s0, s10
; CHECK-NEXT: csel x9, x22, x9, gt
; CHECK-NEXT: csinv x8, x8, xzr, le
-; CHECK-NEXT: fcmp s8, s8
+; CHECK-NEXT: fcmp s0, s0
+; CHECK-NEXT: fmov s0, s8
; CHECK-NEXT: csel x19, xzr, x8, vs
; CHECK-NEXT: csel x20, xzr, x9, vs
; CHECK-NEXT: bl __fixsfti
-; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
-; CHECK-NEXT: mov x2, x19
-; CHECK-NEXT: mov x3, x20
-; CHECK-NEXT: ldp x20, x19, [sp, #64] // 16-byte Folded Reload
+; CHECK-NEXT: fcmp s8, s9
; CHECK-NEXT: ldr x30, [sp, #40] // 8-byte Folded Reload
-; CHECK-NEXT: fcmp s0, s9
-; CHECK-NEXT: ldp d9, d8, [sp, #24] // 16-byte Folded Reload
-; CHECK-NEXT: csel x8, x21, x1, lt
-; CHECK-NEXT: csel x9, xzr, x0, lt
-; CHECK-NEXT: fcmp s0, s10
+; CHECK-NEXT: csel x8, xzr, x0, lt
+; CHECK-NEXT: csel x9, x21, x1, lt
+; CHECK-NEXT: fcmp s8, s10
+; CHECK-NEXT: mov x0, x19
+; CHECK-NEXT: mov x1, x20
; CHECK-NEXT: ldr d10, [sp, #16] // 8-byte Folded Reload
-; CHECK-NEXT: csinv x9, x9, xzr, le
-; CHECK-NEXT: csel x8, x22, x8, gt
-; CHECK-NEXT: fcmp s0, s0
+; CHECK-NEXT: ldp x20, x19, [sp, #64] // 16-byte Folded Reload
+; CHECK-NEXT: csel x9, x22, x9, gt
+; CHECK-NEXT: csinv x8, x8, xzr, le
+; CHECK-NEXT: fcmp s8, s8
; CHECK-NEXT: ldp x22, x21, [sp, #48] // 16-byte Folded Reload
-; CHECK-NEXT: csel x9, xzr, x9, vs
-; CHECK-NEXT: csel x1, xzr, x8, vs
-; CHECK-NEXT: fmov d0, x9
-; CHECK-NEXT: mov v0.d[1], x1
-; CHECK-NEXT: fmov x0, d0
+; CHECK-NEXT: ldp d9, d8, [sp, #24] // 16-byte Folded Reload
+; CHECK-NEXT: csel x2, xzr, x8, vs
+; CHECK-NEXT: csel x3, xzr, x9, vs
; CHECK-NEXT: add sp, sp, #80
; CHECK-NEXT: ret
%x = call <2 x i128> @llvm.fptosi.sat.v2f32.v2i128(<2 x float> %f)
@@ -1068,15 +1060,15 @@ define <4 x i64> @test_signed_v4f32_v4i64(<4 x float> %f) {
define <4 x i100> @test_signed_v4f32_v4i100(<4 x float> %f) {
; CHECK-LABEL: test_signed_v4f32_v4i100:
; CHECK: // %bb.0:
-; CHECK-NEXT: sub sp, sp, #128
-; CHECK-NEXT: str d10, [sp, #32] // 8-byte Folded Spill
-; CHECK-NEXT: stp d9, d8, [sp, #40] // 16-byte Folded Spill
-; CHECK-NEXT: str x30, [sp, #56] // 8-byte Folded Spill
-; CHECK-NEXT: stp x26, x25, [sp, #64] // 16-byte Folded Spill
-; CHECK-NEXT: stp x24, x23, [sp, #80] // 16-byte Folded Spill
-; CHECK-NEXT: stp x22, x21, [sp, #96] // 16-byte Folded Spill
-; CHECK-NEXT: stp x20, x19, [sp, #112] // 16-byte Folded Spill
-; CHECK-NEXT: .cfi_def_cfa_offset 128
+; CHECK-NEXT: sub sp, sp, #112
+; CHECK-NEXT: str d10, [sp, #16] // 8-byte Folded Spill
+; CHECK-NEXT: stp d9, d8, [sp, #24] // 16-byte Folded Spill
+; CHECK-NEXT: str x30, [sp, #40] // 8-byte Folded Spill
+; CHECK-NEXT: stp x26, x25, [sp, #48] // 16-byte Folded Spill
+; CHECK-NEXT: stp x24, x23, [sp, #64] // 16-byte Folded Spill
+; CHECK-NEXT: stp x22, x21, [sp, #80] // 16-byte Folded Spill
+; CHECK-NEXT: stp x20, x19, [sp, #96] // 16-byte Folded Spill
+; CHECK-NEXT: .cfi_def_cfa_offset 112
; CHECK-NEXT: .cfi_offset w19, -8
; CHECK-NEXT: .cfi_offset w20, -16
; CHECK-NEXT: .cfi_offset w21, -24
@@ -1089,28 +1081,40 @@ define <4 x i100> @test_signed_v4f32_v4i100(<4 x float> %f) {
; CHECK-NEXT: .cfi_offset b8, -80
; CHECK-NEXT: .cfi_offset b9, -88
; CHECK-NEXT: .cfi_offset b10, -96
-; CHECK-NEXT: mov s8, v0.s[1]
-; CHECK-NEXT: str q0, [sp, #16] // 16-byte Folded Spill
-; CHECK-NEXT: fmov s0, s8
+; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill
+; CHECK-NEXT: // kill: def $s0 killed $s0 killed $q0
; CHECK-NEXT: bl __fixsfti
; CHECK-NEXT: movi v9.2s, #241, lsl #24
+; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
; CHECK-NEXT: mov w8, #1895825407 // =0x70ffffff
-; CHECK-NEXT: mov x25, #-34359738368 // =0xfffffff800000000
; CHECK-NEXT: fmov s10, w8
-; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload
+; CHECK-NEXT: mov x25, #-34359738368 // =0xfffffff800000000
; CHECK-NEXT: mov x26, #34359738367 // =0x7ffffffff
-; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #8
+; CHECK-NEXT: mov s8, v0.s[1]
+; CHECK-NEXT: fcmp s0, s9
+; CHECK-NEXT: csel x8, xzr, x0, lt
+; CHECK-NEXT: csel x9, x25, x1, lt
+; CHECK-NEXT: fcmp s0, s10
+; CHECK-NEXT: csel x9, x26, x9, gt
+; CHECK-NEXT: csinv x8, x8, xzr, le
+; CHECK-NEXT: fcmp s0, s0
+; CHECK-NEXT: fmov s0, s8
+; CHECK-NEXT: csel x19, xzr, x8, vs
+; CHECK-NEXT: csel x20, xzr, x9, vs
+; CHECK-NEXT: bl __fixsfti
; CHECK-NEXT: fcmp s8, s9
+; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #8
; CHECK-NEXT: csel x8, xzr, x0, lt
; CHECK-NEXT: csel x9, x25, x1, lt
; CHECK-NEXT: fcmp s8, s10
-; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill
-; CHECK-NEXT: // kill: def $s0 killed $s0 killed $q0
; CHECK-NEXT: csel x9, x26, x9, gt
; CHECK-NEXT: csinv x8, x8, xzr, le
; CHECK-NEXT: fcmp s8, s8
-; CHECK-NEXT: csel x19, xzr, x8, vs
-; CHECK-NEXT: csel x20, xzr, x9, vs
+; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill
+; CHECK-NEXT: // kill: def $s0 killed $s0 killed $q0
+; CHECK-NEXT: csel x21, xzr, x8, vs
+; CHECK-NEXT: csel x22, xzr, x9, vs
; CHECK-NEXT: bl __fixsfti
; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
; CHECK-NEXT: fcmp s0, s9
@@ -1122,48 +1126,32 @@ define <4 x i100> @test_signed_v4f32_v4i100(<4 x float> %f) {
; CHECK-NEXT: csinv x8, x8, xzr, le
; CHECK-NEXT: fcmp s0, s0
; CHECK-NEXT: fmov s0, s8
-; CHECK-NEXT: csel x21, xzr, x8, vs
-; CHECK-NEXT: csel x22, xzr, x9, vs
+; CHECK-NEXT: csel x23, xzr, x8, vs
+; CHECK-NEXT: csel x24, xzr, x9, vs
; CHECK-NEXT: bl __fixsfti
; CHECK-NEXT: fcmp s8, s9
-; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload
-; CHECK-NEXT: // kill: def $s0 killed $s0 killed $q0
+; CHECK-NEXT: mov x2, x21
+; CHECK-NEXT: mov x3, x22
+; CHECK-NEXT: mov x4, x23
+; CHECK-NEXT: mov x5, x24
+; CHECK-NEXT: ldr x30, [sp, #40] // 8-byte Folded Reload
+; CHECK-NEXT: ldp x22, x21, [sp, #80] // 16-byte Folded Reload
; CHECK-NEXT: csel x8, xzr, x0, lt
; CHECK-NEXT: csel x9, x25, x1, lt
; CHECK-NEXT: fcmp s8, s10
+; CHECK-NEXT: mov x0, x19
+; CHECK-NEXT: mov x1, x20
+; CHECK-NEXT: ldr d10, [sp, #16] // 8-byte Folded Reload
+; CHECK-NEXT: ldp x20, x19, [sp, #96] // 16-byte Folded Reload
; CHECK-NEXT: csel x9, x26, x9, gt
; CHECK-NEXT: csinv x8, x8, xzr, le
; CHECK-NEXT: fcmp s8, s8
-; CHECK-NEXT: csel x23, xzr, x8, vs
-; CHECK-NEXT: csel x24, xzr, x9, vs
-; CHECK-NEXT: bl __fixsfti
-; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload
-; CHECK-NEXT: mov x2, x19
-; CHECK-NEXT: mov x3, x20
-; CHECK-NEXT: mov x4, x21
-; CHECK-NEXT: mov x5, x22
-; CHECK-NEXT: mov x6, x23
-; CHECK-NEXT: fcmp s0, s9
-; CHECK-NEXT: mov x7, x24
-; CHECK-NEXT: ldr x30, [sp, #56] // 8-byte Folded Reload
-; CHECK-NEXT: ldp x20, x19, [sp, #112] // 16-byte Folded Reload
-; CHECK-NEXT: ldp x22, x21, [sp, #96] // 16-byte Folded Reload
-; CHECK-NEXT: csel x8, x25, x1, lt
-; CHECK-NEXT: csel x9, xzr, x0, lt
-; CHECK-NEXT: fcmp s0, s10
-; CHECK-NEXT: ldp x24, x23, [sp, #80] // 16-byte Folded Reload
-; CHECK-NEXT: ldr d10, [sp, #32] // 8-byte Folded Reload
-; CHECK-NEXT: ldp d9, d8, [sp, #40] // 16-byte Folded Reload
-; CHECK-NEXT: csinv x9, x9, xzr, le
-; CHECK-NEXT: csel x8, x26, x8, gt
-; CHECK-NEXT: fcmp s0, s0
-; CHECK-NEXT: ldp x26, x25, [sp, #64] // 16-byte Folded Reload
-; CHECK-NEXT: csel x9, xzr, x9, vs
-; CHECK-NEXT: csel x1, xzr, x8, vs
-; CHECK-NEXT: fmov d0, x9
-; CHECK-NEXT: mov v0.d[1], x1
-; CHECK-NEXT: fmov x0, d0
-; CHECK-NEXT: add sp, sp, #128
+; CHECK-NEXT: ldp x24, x23, [sp, #64] // 16-byte Folded Reload
+; CHECK-NEXT: ldp x26, x25, [sp, #48] // 16-byte Folded Reload
+; CHECK-NEXT: ldp d9, d8, [sp, #24] // 16-byte Folded Reload
+; CHECK-NEXT: csel x6, xzr, x8, vs
+; CHECK-NEXT: csel x7, xzr, x9, vs
+; CHECK-NEXT: add sp, sp, #112
; CHECK-NEXT: ret
%x = call <4 x i100> @llvm.fptosi.sat.v4f32.v4i100(<4 x float> %f)
ret <4 x i100> %x
@@ -1172,15 +1160,15 @@ define <4 x i100> @test_signed_v4f32_v4i100(<4 x float> %f) {
define <4 x i128> @test_signed_v4f32_v4i128(<4 x float> %f) {
; CHECK-LABEL: test_signed_v4f32_v4i128:
; CHECK: // %bb.0:
-; CHECK-NEXT: sub sp, sp, #128
-; CHECK-NEXT: str d10, [sp, #32] // 8-byte Folded Spill
-; CHECK-NEXT: stp d9, d8, [sp, #40] // 16-byte Folded Spill
-; CHECK-NEXT: str x30, [sp, #56] // 8-byte Folded Spill
-; CHECK-NEXT: stp x26, x25, [sp, #64] // 16-byte Folded Spill
-; CHECK-NEXT: stp x24, x23, [sp, #80] // 16-byte Folded Spill
-; CHECK-NEXT: stp x22, x21, [sp, #96] // 16-byte Folded Spill
-; CHECK-NEXT: stp x20, x19, [sp, #112] // 16-byte Folded Spill
-; CHECK-NEXT: .cfi_def_cfa_offset 128
+; CHECK-NEXT: sub sp, sp, #112
+; CHECK-NEXT: str d10, [sp, #16] // 8-byte Folded Spill
+; CHECK-NEXT: stp d9, d8, [sp, #24] // 16-byte Folded Spill
+; CHECK-NEXT: str x30, [sp, #40] // 8-byte Folded Spill
+; CHECK-NEXT: stp x26, x25, [sp, #48] // 16-byte Folded Spill
+; CHECK-NEXT: stp x24, x23, [sp, #64] // 16-byte Folded Spill
+; CHECK-NEXT: stp x22, x21, [sp, #80] // 16-byte Folded Spill
+; CHECK-NEXT: stp x20, x19, [sp, #96] // 16-byte Folded Spill
+; CHECK-NEXT: .cfi_def_cfa_offset 112
; CHECK-NEXT: .cfi_offset w19, -8
; CHECK-NEXT: .cfi_offset w20, -16
; CHECK-NEXT: .cfi_offset w21, -24
@@ -1193,28 +1181,40 @@ define <4 x i128> @test_signed_v4f32_v4i128(<4 x float> %f) {
; CHECK-NEXT: .cfi_offset b8, -80
; CHECK-NEXT: .cfi_offset b9, -88
; CHECK-NEXT: .cfi_offset b10, -96
-; CHECK-NEXT: mov s8, v0.s[1]
-; CHECK-NEXT: str q0, [sp, #16] // 16-byte Folded Spill
-; CHECK-NEXT: fmov s0, s8
+; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill
+; CHECK-NEXT: // kill: def $s0 killed $s0 killed $q0
; CHECK-NEXT: bl __fixsfti
; CHECK-NEXT: movi v9.2s, #255, lsl #24
+; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
; CHECK-NEXT: mov w8, #2130706431 // =0x7effffff
-; CHECK-NEXT: mov x25, #-9223372036854775808 // =0x8000000000000000
; CHECK-NEXT: fmov s10, w8
-; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload
+; CHECK-NEXT: mov x25, #-9223372036854775808 // =0x8000000000000000
; CHECK-NEXT: mov x26, #9223372036854775807 // =0x7fffffffffffffff
-; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #8
+; CHECK-NEXT: mov s8, v0.s[1]
+; CHECK-NEXT: fcmp s0, s9
+; CHECK-NEXT: csel x8, xzr, x0, lt
+; CHECK-NEXT: csel x9, x25, x1, lt
+; CHECK-NEXT: fcmp s0, s10
+; CHECK-NEXT: csel x9, x26, x9, gt
+; CHECK-NEXT: csinv x8, x8, xzr, le
+; CHECK-NEXT: fcmp s0, s0
+; CHECK-NEXT: fmov s0, s8
+; CHECK-NEXT: csel x19, xzr, x8, vs
+; CHECK-NEXT: csel x20, xzr, x9, vs
+; CHECK-NEXT: bl __fixsfti
; CHECK-NEXT: fcmp s8, s9
+; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #8
; CHECK-NEXT: csel x8, xzr, x0, lt
; CHECK-NEXT: csel x9, x25, x1, lt
; CHECK-NEXT: fcmp s8, s10
-; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill
-; CHECK-NEXT: // kill: def $s0 killed $s0 killed $q0
; CHECK-NEXT: csel x9, x26, x9, gt
; CHECK-NEXT: csinv x8, x8, xzr, le
; CHECK-NEXT: fcmp s8, s8
-; CHECK-NEXT: csel x19, xzr, x8, vs
-; CHECK-NEXT: csel x20, xzr, x9, vs
+; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill
+; CHECK-NEXT: // kill: def $s0 killed $s0 killed $q0
+; CHECK-NEXT: csel x21, xzr, x8, vs
+; CHECK-NEXT: csel x22, xzr, x9, vs
; CHECK-NEXT: bl __fixsfti
; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
; CHECK-NEXT: fcmp s0, s9
@@ -1226,48 +1226,32 @@ define <4 x i128> @test_signed_v4f32_v4i128(<4 x float> %f) {
; CHECK-NEXT: csinv x8, x8, xzr, le
; CHECK-NEXT: fcmp s0, s0
; CHECK-NEXT: fmov s0, s8
-; CHECK-NEXT: csel x21, xzr, x8, vs
-; CHECK-NEXT: csel x22, xzr, x9, vs
+; CHECK-NEXT: csel x23, xzr, x8, vs
+; CHECK-NEXT: csel x24, xzr, x9, vs
; CHECK-NEXT: bl __fixsfti
; CHECK-NEXT: fcmp s8, s9
-; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload
-; CHECK-NEXT: // kill: def $s0 killed $s0 killed $q0
+; CHECK-NEXT: mov x2, x21
+; CHECK-NEXT: mov x3, x22
+; CHECK-NEXT: mov x4, x23
+; CHECK-NEXT: mov x5, x24
+; CHECK-NEXT: ldr x30, [sp, #40] // 8-byte Folded Reload
+; CHECK-NEXT: ldp x22, x21, [sp, #80] // 16-byte Folded Reload
; CHECK-NEXT: csel x8, xzr, x0, lt
; CHECK-NEXT: csel x9, x25, x1, lt
; CHECK-NEXT: fcmp s8, s10
+; CHECK-NEXT: mov x0, x19
+; CHECK-NEXT: mov x1, x20
+; CHECK-NEXT: ldr d10, [sp, #16] // 8-byte Folded Reload
+; CHECK-NEXT: ldp x20, x19, [sp, #96] // 16-byte Folded Reload
; CHECK-NEXT: csel x9, x26, x9, gt
; CHECK-NEXT: csinv x8, x8, xzr, le
; CHECK-NEXT: fcmp s8, s8
-; CHECK-NEXT: csel x23, xzr, x8, vs
-; CHECK-NEXT: csel x24, xzr, x9, vs
-; CHECK-NEXT: bl __fixsfti
-; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload
-; CHECK-NEXT: mov x2, x19
-; CHECK-NEXT: mov x3, x20
-; CHECK-NEXT: mov x4, x21
-; CHECK-NEXT: mov x5, x22
-; CHECK-NEXT: mov x6, x23
-; CHECK-NEXT: fcmp s0, s9
-; CHECK-NEXT: mov x7, x24
-; CHECK-NEXT: ldr x30, [sp, #56] // 8-byte Folded Reload
-; CHECK-NEXT: ldp x20, x19, [sp, #112] // 16-byte Folded Reload
-; CHECK-NEXT: ldp x22, x21, [sp, #96] // 16-byte Folded Reload
-; CHECK-NEXT: csel x8, x25, x1, lt
-; CHECK-NEXT: csel x9, xzr, x0, lt
-; CHECK-NEXT: fcmp s0, s10
-; CHECK-NEXT: ldp x24, x23, [sp, #80] // 16-byte Folded Reload
-; CHECK-NEXT: ldr d10, [sp, #32] // 8-byte Folded Reload
-; CHECK-NEXT: ldp d9, d8, [sp, #40] // 16-byte Folded Reload
-; CHECK-NEXT: csinv x9, x9, xzr, le
-; CHECK-NEXT: csel x8, x26, x8, gt
-; CHECK-NEXT: fcmp s0, s0
-; CHECK-NEXT: ldp x26, x25, [sp, #64] // 16-byte Folded Reload
-; CHECK-NEXT: csel x9, xzr, x9, vs
-; CHECK-NEXT: csel x1, xzr, x8, vs
-; CHECK-NEXT: fmov d0, x9
-; CHECK-NEXT: mov v0.d[1], x1
-; CHECK-NEXT: fmov x0, d0
-; CHECK-NEXT: add sp, sp, #128
+; CHECK-NEXT: ldp x24, x23, [sp, #64] // 16-byte Folded Reload
+; CHECK-NEXT: ldp x26, x25, [sp, #48] // 16-byte Folded Reload
+; CHECK-NEXT: ldp d9, d8, [sp, #24] // 16-byte Folded Reload
+; CHECK-NEXT: csel x6, xzr, x8, vs
+; CHECK-NEXT: csel x7, xzr, x9, vs
+; CHECK-NEXT: add sp, sp, #112
; CHECK-NEXT: ret
%x = call <4 x i128> @llvm.fptosi.sat.v4f32.v4i128(<4 x float> %f)
ret <4 x i128> %x
@@ -1465,48 +1449,44 @@ define <2 x i100> @test_signed_v2f64_v2i100(<2 x double> %f) {
; CHECK-NEXT: .cfi_offset b8, -48
; CHECK-NEXT: .cfi_offset b9, -56
; CHECK-NEXT: .cfi_offset b10, -64
-; CHECK-NEXT: mov d8, v0.d[1]
; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill
-; CHECK-NEXT: fmov d0, d8
+; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-NEXT: bl __fixdfti
; CHECK-NEXT: mov x8, #-4170333254945079296 // =0xc620000000000000
+; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
; CHECK-NEXT: mov x21, #-34359738368 // =0xfffffff800000000
-; CHECK-NEXT: mov x22, #34359738367 // =0x7ffffffff
; CHECK-NEXT: fmov d9, x8
; CHECK-NEXT: mov x8, #5053038781909696511 // =0x461fffffffffffff
-; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-NEXT: mov x22, #34359738367 // =0x7ffffffff
; CHECK-NEXT: fmov d10, x8
-; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
-; CHECK-NEXT: fcmp d8, d9
+; CHECK-NEXT: mov d8, v0.d[1]
+; CHECK-NEXT: fcmp d0, d9
; CHECK-NEXT: csel x8, xzr, x0, lt
; CHECK-NEXT: csel x9, x21, x1, lt
-; CHECK-NEXT: fcmp d8, d10
+; CHECK-NEXT: fcmp d0, d10
; CHECK-NEXT: csel x9, x22, x9, gt
; CHECK-NEXT: csinv x8, x8, xzr, le
-; CHECK-NEXT: fcmp d8, d8
+; CHECK-NEXT: fcmp d0, d0
+; CHECK-NEXT: fmov d0, d8
; CHECK-NEXT: csel x19, xzr, x8, vs
; CHECK-NEXT: csel x20, xzr, x9, vs
; CHECK-NEXT: bl __fixdfti
-; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
-; CHECK-NEXT: mov x2, x19
-; CHECK-NEXT: mov x3, x20
-; CHECK-NEXT: ldp x20, x19, [sp, #64] // 16-byte Folded Reload
+; CHECK-NEXT: fcmp d8, d9
; CHECK-NEXT: ldr x30, [sp, #40] // 8-byte Folded Reload
-; CHECK-NEXT: fcmp d0, d9
-; CHECK-NEXT: ldp d9, d8, [sp, #24] // 16-byte Folded Reload
-; CHECK-NEXT: csel x8, x21, x1, lt
-; CHECK-NEXT: csel x9, xzr, x0, lt
-; CHECK-NEXT: fcmp d0, d10
+; CHECK-NEXT: csel x8, xzr, x0, lt
+; CHECK-NEXT: csel x9, x21, x1, lt
+; CHECK-NEXT: fcmp d8, d10
+; CHECK-NEXT: mov x0, x19
+; CHECK-NEXT: mov x1, x20
; CHECK-NEXT: ldr d10, [sp, #16] // 8-byte Folded Reload
-; CHECK-NEXT: csinv x9, x9, xzr, le
-; CHECK-NEXT: csel x8, x22, x8, gt
-; CHECK-NEXT: fcmp d0, d0
+; CHECK-NEXT: ldp x20, x19, [sp, #64] // 16-byte Folded Reload
+; CHECK-NEXT: csel x9, x22, x9, gt
+; CHECK-NEXT: csinv x8, x8, xzr, le
+; CHECK-NEXT: fcmp d8, d8
; CHECK-NEXT: ldp x22, x21, [sp, #48] // 16-byte Folded Reload
-; CHECK-NEXT: csel x9, xzr, x9, vs
-; CHECK-NEXT: csel x1, xzr, x8, vs
-; CHECK-NEXT: fmov d0, x9
-; CHECK-NEXT: mov v0.d[1], x1
-; CHECK-NEXT: fmov x0, d0
+; CHECK-NEXT: ldp d9, d8, [sp, #24] // 16-byte Folded Reload
+; CHECK-NEXT: csel x2, xzr, x8, vs
+; CHECK-NEXT: csel x3, xzr, x9, vs
; CHECK-NEXT: add sp, sp, #80
; CHECK-NEXT: ret
%x = call <2 x i100> @llvm.fptosi.sat.v2f64.v2i100(<2 x double> %f)
@@ -1531,48 +1511,44 @@ define <2 x i128> @test_signed_v2f64_v2i128(<2 x double> %f) {
; CHECK-NEXT: .cfi_offset b8, -48
; CHECK-NEXT: .cfi_offset b9, -56
; CHECK-NEXT: .cfi_offset b10, -64
-; CHECK-NEXT: mov d8, v0.d[1]
; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill
-; CHECK-NEXT: fmov d0, d8
+; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-NEXT: bl __fixdfti
; CHECK-NEXT: mov x8, #-4044232465378705408 // =0xc7e0000000000000
+; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
; CHECK-NEXT: mov x21, #-9223372036854775808 // =0x8000000000000000
-; CHECK-NEXT: mov x22, #9223372036854775807 // =0x7fffffffffffffff
; CHECK-NEXT: fmov d9, x8
; CHECK-NEXT: mov x8, #5179139571476070399 // =0x47dfffffffffffff
-; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-NEXT: mov x22, #9223372036854775807 // =0x7fffffffffffffff
; CHECK-NEXT: fmov d10, x8
-; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
-; CHECK-NEXT: fcmp d8, d9
+; CHECK-NEXT: mov d8, v0.d[1]
+; CHECK-NEXT: fcmp d0, d9
; CHECK-NEXT: csel x8, xzr, x0, lt
; CHECK-NEXT: csel x9, x21, x1, lt
-; CHECK-NEXT: fcmp d8, d10
+; CHECK-NEXT: fcmp d0, d10
; CHECK-NEXT: csel x9, x22, x9, gt
; CHECK-NEXT: csinv x8, x8, xzr, le
-; CHECK-NEXT: fcmp d8, d8
+; CHECK-NEXT: fcmp d0, d0
+; CHECK-NEXT: fmov d0, d8
; CHECK-NEXT: csel x19, xzr, x8, vs
; CHECK-NEXT: csel x20, xzr, x9, vs
; CHECK-NEXT: bl __fixdfti
-; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
-; CHECK-NEXT: mov x2, x19
-; CHECK-NEXT: mov x3, x20
-; CHECK-NEXT: ldp x20, x19, [sp, #64] // 16-byte Folded Reload
+; CHECK-NEXT: fcmp d8, d9
; CHECK-NEXT: ldr x30, [sp, #40] // 8-byte Folded Reload
-; CHECK-NEXT: fcmp d0, d9
-; CHECK-NEXT: ldp d9, d8, [sp, #24] // 16-byte Folded Reload
-; CHECK-NEXT: csel x8, x21, x1, lt
-; CHECK-NEXT: csel x9, xzr, x0, lt
-; CHECK-NEXT: fcmp d0, d10
+; CHECK-NEXT: csel x8, xzr, x0, lt
+; CHECK-NEXT: csel x9, x21, x1, lt
+; CHECK-NEXT: fcmp d8, d10
+; CHECK-NEXT: mov x0, x19
+; CHECK-NEXT: mov x1, x20
; CHECK-NEXT: ldr d10, [sp, #16] // 8-byte Folded Reload
-; CHECK-NEXT: csinv x9, x9, xzr, le
-; CHECK-NEXT: csel x8, x22, x8, gt
-; CHECK-NEXT: fcmp d0, d0
+; CHECK-NEXT: ldp x20, x19, [sp, #64] // 16-byte Folded Reload
+; CHECK-NEXT: csel x9, x22, x9, gt
+; CHECK-NEXT: csinv x8, x8, xzr, le
+; CHECK-NEXT: fcmp d8, d8
; CHECK-NEXT: ldp x22, x21, [sp, #48] // 16-byte Folded Reload
-; CHECK-NEXT: csel x9, xzr, x9, vs
-; CHECK-NEXT: csel x1, xzr, x8, vs
-; CHECK-NEXT: fmov d0, x9
-; CHECK-NEXT: mov v0.d[1], x1
-; CHECK-NEXT: fmov x0, d0
+; CHECK-NEXT: ldp d9, d8, [sp, #24] // 16-byte Folded Reload
+; CHECK-NEXT: csel x2, xzr, x8, vs
+; CHECK-NEXT: csel x3, xzr, x9, vs
; CHECK-NEXT: add sp, sp, #80
; CHECK-NEXT: ret
%x = call <2 x i128> @llvm.fptosi.sat.v2f64.v2i128(<2 x double> %f)
@@ -1838,9 +1814,8 @@ define <4 x i100> @test_signed_v4f16_v4i100(<4 x half> %f) {
; CHECK-NEXT: .cfi_offset b9, -88
; CHECK-NEXT: .cfi_offset b10, -96
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT: mov h1, v0.h[1]
+; CHECK-NEXT: fcvt s8, h0
; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill
-; CHECK-NEXT: fcvt s8, h1
; CHECK-NEXT: fmov s0, s8
; CHECK-NEXT: bl __fixsfti
; CHECK-NEXT: movi v9.2s, #241, lsl #24
@@ -1849,7 +1824,7 @@ define <4 x i100> @test_signed_v4f16_v4i100(<4 x half> %f) {
; CHECK-NEXT: fmov s10, w8
; CHECK-NEXT: mov x25, #-34359738368 // =0xfffffff800000000
; CHECK-NEXT: mov x26, #34359738367 // =0x7ffffffff
-; CHECK-NEXT: mov h0, v0.h[2]
+; CHECK-NEXT: mov h0, v0.h[1]
; CHECK-NEXT: fcmp s8, s9
; CHECK-NEXT: csel x8, xzr, x0, lt
; CHECK-NEXT: csel x9, x25, x1, lt
@@ -1864,7 +1839,7 @@ define <4 x i100> @test_signed_v4f16_v4i100(<4 x half> %f) {
; CHECK-NEXT: bl __fixsfti
; CHECK-NEXT: fcmp s8, s9
; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
-; CHECK-NEXT: mov h0, v0.h[3]
+; CHECK-NEXT: mov h0, v0.h[2]
; CHECK-NEXT: csel x8, xzr, x0, lt
; CHECK-NEXT: csel x9, x25, x1, lt
; CHECK-NEXT: fcmp s8, s10
@@ -1878,6 +1853,7 @@ define <4 x i100> @test_signed_v4f16_v4i100(<4 x half> %f) {
; CHECK-NEXT: bl __fixsfti
; CHECK-NEXT: fcmp s8, s9
; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-NEXT: mov h0, v0.h[3]
; CHECK-NEXT: csel x8, xzr, x0, lt
; CHECK-NEXT: csel x9, x25, x1, lt
; CHECK-NEXT: fcmp s8, s10
@@ -1890,30 +1866,27 @@ define <4 x i100> @test_signed_v4f16_v4i100(<4 x half> %f) {
; CHECK-NEXT: fmov s0, s8
; CHECK-NEXT: bl __fixsfti
; CHECK-NEXT: fcmp s8, s9
-; CHECK-NEXT: mov x2, x19
-; CHECK-NEXT: mov x3, x20
-; CHECK-NEXT: mov x4, x21
-; CHECK-NEXT: mov x5, x22
-; CHECK-NEXT: mov x6, x23
-; CHECK-NEXT: mov x7, x24
-; CHECK-NEXT: ldp x20, x19, [sp, #96] // 16-byte Folded Reload
-; CHECK-NEXT: csel x8, x25, x1, lt
-; CHECK-NEXT: csel x9, xzr, x0, lt
-; CHECK-NEXT: fcmp s8, s10
-; CHECK-NEXT: ldp x22, x21, [sp, #80] // 16-byte Folded Reload
+; CHECK-NEXT: mov x2, x21
+; CHECK-NEXT: mov x3, x22
+; CHECK-NEXT: mov x4, x23
+; CHECK-NEXT: mov x5, x24
; CHECK-NEXT: ldr x30, [sp, #40] // 8-byte Folded Reload
-; CHECK-NEXT: ldp x24, x23, [sp, #64] // 16-byte Folded Reload
+; CHECK-NEXT: ldp x22, x21, [sp, #80] // 16-byte Folded Reload
+; CHECK-NEXT: csel x8, xzr, x0, lt
+; CHECK-NEXT: csel x9, x25, x1, lt
+; CHECK-NEXT: fcmp s8, s10
+; CHECK-NEXT: mov x0, x19
+; CHECK-NEXT: mov x1, x20
; CHECK-NEXT: ldr d10, [sp, #16] // 8-byte Folded Reload
-; CHECK-NEXT: csinv x9, x9, xzr, le
-; CHECK-NEXT: csel x8, x26, x8, gt
+; CHECK-NEXT: ldp x20, x19, [sp, #96] // 16-byte Folded Reload
+; CHECK-NEXT: csel x9, x26, x9, gt
+; CHECK-NEXT: csinv x8, x8, xzr, le
; CHECK-NEXT: fcmp s8, s8
+; CHECK-NEXT: ldp x24, x23, [sp, #64] // 16-byte Folded Reload
; CHECK-NEXT: ldp x26, x25, [sp, #48] // 16-byte Folded Reload
; CHECK-NEXT: ldp d9, d8, [sp, #24] // 16-byte Folded Reload
-; CHECK-NEXT: csel x9, xzr, x9, vs
-; CHECK-NEXT: csel x1, xzr, x8, vs
-; CHECK-NEXT: fmov d0, x9
-; CHECK-NEXT: mov v0.d[1], x1
-; CHECK-NEXT: fmov x0, d0
+; CHECK-NEXT: csel x6, xzr, x8, vs
+; CHECK-NEXT: csel x7, xzr, x9, vs
; CHECK-NEXT: add sp, sp, #112
; CHECK-NEXT: ret
%x = call <4 x i100> @llvm.fptosi.sat.v4f16.v4i100(<4 x half> %f)
@@ -1945,9 +1918,8 @@ define <4 x i128> @test_signed_v4f16_v4i128(<4 x half> %f) {
; CHECK-NEXT: .cfi_offset b9, -88
; CHECK-NEXT: .cfi_offset b10, -96
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT: mov h1, v0.h[1]
+; CHECK-NEXT: fcvt s8, h0
; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill
-; CHECK-NEXT: fcvt s8, h1
; CHECK-NEXT: fmov s0, s8
; CHECK-NEXT: bl __fixsfti
; CHECK-NEXT: movi v9.2s, #255, lsl #24
@@ -1956,7 +1928,7 @@ define <4 x i128> @test_signed_v4f16_v4i128(<4 x half> %f) {
; CHECK-NEXT: fmov s10, w8
; CHECK-NEXT: mov x25, #-9223372036854775808 // =0x8000000000000000
; CHECK-NEXT: mov x26, #9223372036854775807 // =0x7fffffffffffffff
-; CHECK-NEXT: mov h0, v0.h[2]
+; CHECK-NEXT: mov h0, v0.h[1]
; CHECK-NEXT: fcmp s8, s9
; CHECK-NEXT: csel x8, xzr, x0, lt
; CHECK-NEXT: csel x9, x25, x1, lt
@@ -1971,7 +1943,7 @@ define <4 x i128> @test_signed_v4f16_v4i128(<4 x half> %f) {
; CHECK-NEXT: bl __fixsfti
; CHECK-NEXT: fcmp s8, s9
; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
-; CHECK-NEXT: mov h0, v0.h[3]
+; CHECK-NEXT: mov h0, v0.h[2]
; CHECK-NEXT: csel x8, xzr, x0, lt
; CHECK-NEXT: csel x9, x25, x1, lt
; CHECK-NEXT: fcmp s8, s10
@@ -1985,6 +1957,7 @@ define <4 x i128> @test_signed_v4f16_v4i128(<4 x half> %f) {
; CHECK-NEXT: bl __fixsfti
; CHECK-NEXT: fcmp s8, s9
; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-NEXT: mov h0, v0.h[3]
; CHECK-NEXT: csel x8, xzr, x0, lt
; CHECK-NEXT: csel x9, x25, x1, lt
; CHECK-NEXT: fcmp s8, s10
@@ -1997,30 +1970,27 @@ define <4 x i128> @test_signed_v4f16_v4i128(<4 x half> %f) {
; CHECK-NEXT: fmov s0, s8
; CHECK-NEXT: bl __fixsfti
; CHECK-NEXT: fcmp s8, s9
-; CHECK-NEXT: mov x2, x19
-; CHECK-NEXT: mov x3, x20
-; CHECK-NEXT: mov x4, x21
-; CHECK-NEXT: mov x5, x22
-; CHECK-NEXT: mov x6, x23
-; CHECK-NEXT: mov x7, x24
-; CHECK-NEXT: ldp x20, x19, [sp, #96] // 16-byte Folded Reload
-; CHECK-NEXT: csel x8, x25, x1, lt
-; CHECK-NEXT: csel x9, xzr, x0, lt
-; CHECK-NEXT: fcmp s8, s10
-; CHECK-NEXT: ldp x22, x21, [sp, #80] // 16-byte Folded Reload
+; CHECK-NEXT: mov x2, x21
+; CHECK-NEXT: mov x3, x22
+; CHECK-NEXT: mov x4, x23
+; CHECK-NEXT: mov x5, x24
; CHECK-NEXT: ldr x30, [sp, #40] // 8-byte Folded Reload
-; CHECK-NEXT: ldp x24, x23, [sp, #64] // 16-byte Folded Reload
+; CHECK-NEXT: ldp x22, x21, [sp, #80] // 16-byte Folded Reload
+; CHECK-NEXT: csel x8, xzr, x0, lt
+; CHECK-NEXT: csel x9, x25, x1, lt
+; CHECK-NEXT: fcmp s8, s10
+; CHECK-NEXT: mov x0, x19
+; CHECK-NEXT: mov x1, x20
; CHECK-NEXT: ldr d10, [sp, #16] // 8-byte Folded Reload
-; CHECK-NEXT: csinv x9, x9, xzr, le
-; CHECK-NEXT: csel x8, x26, x8, gt
+; CHECK-NEXT: ldp x20, x19, [sp, #96] // 16-byte Folded Reload
+; CHECK-NEXT: csel x9, x26, x9, gt
+; CHECK-NEXT: csinv x8, x8, xzr, le
; CHECK-NEXT: fcmp s8, s8
+; CHECK-NEXT: ldp x24, x23, [sp, #64] // 16-byte Folded Reload
; CHECK-NEXT: ldp x26, x25, [sp, #48] // 16-byte Folded Reload
; CHECK-NEXT: ldp d9, d8, [sp, #24] // 16-byte Folded Reload
-; CHECK-NEXT: csel x9, xzr, x9, vs
-; CHECK-NEXT: csel x1, xzr, x8, vs
-; CHECK-NEXT: fmov d0, x9
-; CHECK-NEXT: mov v0.d[1], x1
-; CHECK-NEXT: fmov x0, d0
+; CHECK-NEXT: csel x6, xzr, x8, vs
+; CHECK-NEXT: csel x7, xzr, x9, vs
; CHECK-NEXT: add sp, sp, #112
; CHECK-NEXT: ret
%x = call <4 x i128> @llvm.fptosi.sat.v4f16.v4i128(<4 x half> %f)
diff --git a/llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll b/llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll
index b03d145..6089d76 100644
--- a/llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll
+++ b/llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll
@@ -729,37 +729,33 @@ define <2 x i100> @test_unsigned_v2f32_v2i100(<2 x float> %f) {
; CHECK-NEXT: .cfi_offset b8, -40
; CHECK-NEXT: .cfi_offset b9, -48
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT: mov s8, v0.s[1]
; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill
-; CHECK-NEXT: fmov s0, s8
+; CHECK-NEXT: // kill: def $s0 killed $s0 killed $q0
; CHECK-NEXT: bl __fixunssfti
+; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
; CHECK-NEXT: mov w8, #1904214015 // =0x717fffff
-; CHECK-NEXT: fcmp s8, #0.0
; CHECK-NEXT: mov x21, #68719476735 // =0xfffffffff
; CHECK-NEXT: fmov s9, w8
-; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
-; CHECK-NEXT: // kill: def $s0 killed $s0 killed $q0
+; CHECK-NEXT: mov s8, v0.s[1]
+; CHECK-NEXT: fcmp s0, #0.0
; CHECK-NEXT: csel x8, xzr, x0, lt
; CHECK-NEXT: csel x9, xzr, x1, lt
-; CHECK-NEXT: fcmp s8, s9
+; CHECK-NEXT: fcmp s0, s9
+; CHECK-NEXT: fmov s0, s8
; CHECK-NEXT: csel x19, x21, x9, gt
; CHECK-NEXT: csinv x20, x8, xzr, le
; CHECK-NEXT: bl __fixunssfti
-; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
-; CHECK-NEXT: mov x2, x20
-; CHECK-NEXT: mov x3, x19
-; CHECK-NEXT: ldp x20, x19, [sp, #48] // 16-byte Folded Reload
-; CHECK-NEXT: fcmp s0, #0.0
+; CHECK-NEXT: fcmp s8, #0.0
; CHECK-NEXT: csel x8, xzr, x0, lt
; CHECK-NEXT: csel x9, xzr, x1, lt
-; CHECK-NEXT: fcmp s0, s9
-; CHECK-NEXT: ldp d9, d8, [sp, #16] // 16-byte Folded Reload
-; CHECK-NEXT: csinv x8, x8, xzr, le
-; CHECK-NEXT: csel x1, x21, x9, gt
-; CHECK-NEXT: fmov d0, x8
+; CHECK-NEXT: fcmp s8, s9
+; CHECK-NEXT: mov x0, x20
+; CHECK-NEXT: mov x1, x19
+; CHECK-NEXT: ldp x20, x19, [sp, #48] // 16-byte Folded Reload
+; CHECK-NEXT: csel x3, x21, x9, gt
; CHECK-NEXT: ldp x30, x21, [sp, #32] // 16-byte Folded Reload
-; CHECK-NEXT: mov v0.d[1], x1
-; CHECK-NEXT: fmov x0, d0
+; CHECK-NEXT: ldp d9, d8, [sp, #16] // 16-byte Folded Reload
+; CHECK-NEXT: csinv x2, x8, xzr, le
; CHECK-NEXT: add sp, sp, #64
; CHECK-NEXT: ret
%x = call <2 x i100> @llvm.fptoui.sat.v2f32.v2i100(<2 x float> %f)
@@ -780,36 +776,32 @@ define <2 x i128> @test_unsigned_v2f32_v2i128(<2 x float> %f) {
; CHECK-NEXT: .cfi_offset b8, -40
; CHECK-NEXT: .cfi_offset b9, -48
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT: mov s8, v0.s[1]
; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill
-; CHECK-NEXT: fmov s0, s8
+; CHECK-NEXT: // kill: def $s0 killed $s0 killed $q0
; CHECK-NEXT: bl __fixunssfti
-; CHECK-NEXT: mov w8, #2139095039 // =0x7f7fffff
-; CHECK-NEXT: fcmp s8, #0.0
; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-NEXT: mov w8, #2139095039 // =0x7f7fffff
; CHECK-NEXT: fmov s9, w8
-; CHECK-NEXT: // kill: def $s0 killed $s0 killed $q0
+; CHECK-NEXT: mov s8, v0.s[1]
+; CHECK-NEXT: fcmp s0, #0.0
; CHECK-NEXT: csel x8, xzr, x1, lt
; CHECK-NEXT: csel x9, xzr, x0, lt
-; CHECK-NEXT: fcmp s8, s9
+; CHECK-NEXT: fcmp s0, s9
+; CHECK-NEXT: fmov s0, s8
; CHECK-NEXT: csinv x19, x9, xzr, le
; CHECK-NEXT: csinv x20, x8, xzr, le
; CHECK-NEXT: bl __fixunssfti
-; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
-; CHECK-NEXT: mov x2, x19
-; CHECK-NEXT: mov x3, x20
-; CHECK-NEXT: ldp x20, x19, [sp, #48] // 16-byte Folded Reload
+; CHECK-NEXT: fcmp s8, #0.0
; CHECK-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload
-; CHECK-NEXT: fcmp s0, #0.0
-; CHECK-NEXT: csel x8, xzr, x0, lt
-; CHECK-NEXT: csel x9, xzr, x1, lt
-; CHECK-NEXT: fcmp s0, s9
+; CHECK-NEXT: csel x8, xzr, x1, lt
+; CHECK-NEXT: csel x9, xzr, x0, lt
+; CHECK-NEXT: fcmp s8, s9
+; CHECK-NEXT: mov x0, x19
+; CHECK-NEXT: mov x1, x20
+; CHECK-NEXT: ldp x20, x19, [sp, #48] // 16-byte Folded Reload
; CHECK-NEXT: ldp d9, d8, [sp, #16] // 16-byte Folded Reload
-; CHECK-NEXT: csinv x8, x8, xzr, le
-; CHECK-NEXT: csinv x1, x9, xzr, le
-; CHECK-NEXT: fmov d0, x8
-; CHECK-NEXT: mov v0.d[1], x1
-; CHECK-NEXT: fmov x0, d0
+; CHECK-NEXT: csinv x2, x9, xzr, le
+; CHECK-NEXT: csinv x3, x8, xzr, le
; CHECK-NEXT: add sp, sp, #64
; CHECK-NEXT: ret
%x = call <2 x i128> @llvm.fptoui.sat.v2f32.v2i128(<2 x float> %f)
@@ -935,13 +927,13 @@ define <4 x i64> @test_unsigned_v4f32_v4i64(<4 x float> %f) {
define <4 x i100> @test_unsigned_v4f32_v4i100(<4 x float> %f) {
; CHECK-LABEL: test_unsigned_v4f32_v4i100:
; CHECK: // %bb.0:
-; CHECK-NEXT: sub sp, sp, #112
-; CHECK-NEXT: stp d9, d8, [sp, #32] // 16-byte Folded Spill
-; CHECK-NEXT: stp x30, x25, [sp, #48] // 16-byte Folded Spill
-; CHECK-NEXT: stp x24, x23, [sp, #64] // 16-byte Folded Spill
-; CHECK-NEXT: stp x22, x21, [sp, #80] // 16-byte Folded Spill
-; CHECK-NEXT: stp x20, x19, [sp, #96] // 16-byte Folded Spill
-; CHECK-NEXT: .cfi_def_cfa_offset 112
+; CHECK-NEXT: sub sp, sp, #96
+; CHECK-NEXT: stp d9, d8, [sp, #16] // 16-byte Folded Spill
+; CHECK-NEXT: stp x30, x25, [sp, #32] // 16-byte Folded Spill
+; CHECK-NEXT: stp x24, x23, [sp, #48] // 16-byte Folded Spill
+; CHECK-NEXT: stp x22, x21, [sp, #64] // 16-byte Folded Spill
+; CHECK-NEXT: stp x20, x19, [sp, #80] // 16-byte Folded Spill
+; CHECK-NEXT: .cfi_def_cfa_offset 96
; CHECK-NEXT: .cfi_offset w19, -8
; CHECK-NEXT: .cfi_offset w20, -16
; CHECK-NEXT: .cfi_offset w21, -24
@@ -952,23 +944,32 @@ define <4 x i100> @test_unsigned_v4f32_v4i100(<4 x float> %f) {
; CHECK-NEXT: .cfi_offset w30, -64
; CHECK-NEXT: .cfi_offset b8, -72
; CHECK-NEXT: .cfi_offset b9, -80
+; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill
+; CHECK-NEXT: // kill: def $s0 killed $s0 killed $q0
+; CHECK-NEXT: bl __fixunssfti
+; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-NEXT: mov w8, #1904214015 // =0x717fffff
+; CHECK-NEXT: mov x25, #68719476735 // =0xfffffffff
+; CHECK-NEXT: fmov s9, w8
; CHECK-NEXT: mov s8, v0.s[1]
-; CHECK-NEXT: str q0, [sp, #16] // 16-byte Folded Spill
+; CHECK-NEXT: fcmp s0, #0.0
+; CHECK-NEXT: csel x8, xzr, x0, lt
+; CHECK-NEXT: csel x9, xzr, x1, lt
+; CHECK-NEXT: fcmp s0, s9
; CHECK-NEXT: fmov s0, s8
+; CHECK-NEXT: csel x19, x25, x9, gt
+; CHECK-NEXT: csinv x20, x8, xzr, le
; CHECK-NEXT: bl __fixunssfti
-; CHECK-NEXT: mov w8, #1904214015 // =0x717fffff
; CHECK-NEXT: fcmp s8, #0.0
-; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload
-; CHECK-NEXT: fmov s9, w8
-; CHECK-NEXT: mov x25, #68719476735 // =0xfffffffff
+; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #8
; CHECK-NEXT: csel x8, xzr, x0, lt
; CHECK-NEXT: csel x9, xzr, x1, lt
; CHECK-NEXT: fcmp s8, s9
; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill
+; CHECK-NEXT: csel x21, x25, x9, gt
+; CHECK-NEXT: csinv x22, x8, xzr, le
; CHECK-NEXT: // kill: def $s0 killed $s0 killed $q0
-; CHECK-NEXT: csel x19, x25, x9, gt
-; CHECK-NEXT: csinv x20, x8, xzr, le
; CHECK-NEXT: bl __fixunssfti
; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
; CHECK-NEXT: mov s8, v0.s[1]
@@ -977,40 +978,27 @@ define <4 x i100> @test_unsigned_v4f32_v4i100(<4 x float> %f) {
; CHECK-NEXT: csel x9, xzr, x1, lt
; CHECK-NEXT: fcmp s0, s9
; CHECK-NEXT: fmov s0, s8
-; CHECK-NEXT: csel x21, x25, x9, gt
-; CHECK-NEXT: csinv x22, x8, xzr, le
-; CHECK-NEXT: bl __fixunssfti
-; CHECK-NEXT: fcmp s8, #0.0
-; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload
-; CHECK-NEXT: // kill: def $s0 killed $s0 killed $q0
-; CHECK-NEXT: csel x8, xzr, x0, lt
-; CHECK-NEXT: csel x9, xzr, x1, lt
-; CHECK-NEXT: fcmp s8, s9
; CHECK-NEXT: csel x23, x25, x9, gt
; CHECK-NEXT: csinv x24, x8, xzr, le
; CHECK-NEXT: bl __fixunssfti
-; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload
-; CHECK-NEXT: mov x2, x20
-; CHECK-NEXT: mov x3, x19
-; CHECK-NEXT: mov x4, x22
-; CHECK-NEXT: mov x5, x21
-; CHECK-NEXT: mov x6, x24
-; CHECK-NEXT: fcmp s0, #0.0
-; CHECK-NEXT: mov x7, x23
-; CHECK-NEXT: ldp x20, x19, [sp, #96] // 16-byte Folded Reload
-; CHECK-NEXT: ldp x22, x21, [sp, #80] // 16-byte Folded Reload
+; CHECK-NEXT: fcmp s8, #0.0
+; CHECK-NEXT: mov x2, x22
+; CHECK-NEXT: mov x3, x21
+; CHECK-NEXT: mov x4, x24
+; CHECK-NEXT: mov x5, x23
+; CHECK-NEXT: ldp x22, x21, [sp, #64] // 16-byte Folded Reload
; CHECK-NEXT: csel x8, xzr, x0, lt
; CHECK-NEXT: csel x9, xzr, x1, lt
-; CHECK-NEXT: fcmp s0, s9
-; CHECK-NEXT: ldp x24, x23, [sp, #64] // 16-byte Folded Reload
-; CHECK-NEXT: ldp d9, d8, [sp, #32] // 16-byte Folded Reload
-; CHECK-NEXT: csinv x8, x8, xzr, le
-; CHECK-NEXT: csel x1, x25, x9, gt
-; CHECK-NEXT: fmov d0, x8
-; CHECK-NEXT: ldp x30, x25, [sp, #48] // 16-byte Folded Reload
-; CHECK-NEXT: mov v0.d[1], x1
-; CHECK-NEXT: fmov x0, d0
-; CHECK-NEXT: add sp, sp, #112
+; CHECK-NEXT: fcmp s8, s9
+; CHECK-NEXT: mov x0, x20
+; CHECK-NEXT: mov x1, x19
+; CHECK-NEXT: ldp x20, x19, [sp, #80] // 16-byte Folded Reload
+; CHECK-NEXT: csel x7, x25, x9, gt
+; CHECK-NEXT: ldp x24, x23, [sp, #48] // 16-byte Folded Reload
+; CHECK-NEXT: ldp x30, x25, [sp, #32] // 16-byte Folded Reload
+; CHECK-NEXT: csinv x6, x8, xzr, le
+; CHECK-NEXT: ldp d9, d8, [sp, #16] // 16-byte Folded Reload
+; CHECK-NEXT: add sp, sp, #96
; CHECK-NEXT: ret
%x = call <4 x i100> @llvm.fptoui.sat.v4f32.v4i100(<4 x float> %f)
ret <4 x i100> %x
@@ -1019,13 +1007,13 @@ define <4 x i100> @test_unsigned_v4f32_v4i100(<4 x float> %f) {
define <4 x i128> @test_unsigned_v4f32_v4i128(<4 x float> %f) {
; CHECK-LABEL: test_unsigned_v4f32_v4i128:
; CHECK: // %bb.0:
-; CHECK-NEXT: sub sp, sp, #112
-; CHECK-NEXT: stp d9, d8, [sp, #32] // 16-byte Folded Spill
-; CHECK-NEXT: str x30, [sp, #48] // 8-byte Folded Spill
-; CHECK-NEXT: stp x24, x23, [sp, #64] // 16-byte Folded Spill
-; CHECK-NEXT: stp x22, x21, [sp, #80] // 16-byte Folded Spill
-; CHECK-NEXT: stp x20, x19, [sp, #96] // 16-byte Folded Spill
-; CHECK-NEXT: .cfi_def_cfa_offset 112
+; CHECK-NEXT: sub sp, sp, #96
+; CHECK-NEXT: stp d9, d8, [sp, #16] // 16-byte Folded Spill
+; CHECK-NEXT: str x30, [sp, #32] // 8-byte Folded Spill
+; CHECK-NEXT: stp x24, x23, [sp, #48] // 16-byte Folded Spill
+; CHECK-NEXT: stp x22, x21, [sp, #64] // 16-byte Folded Spill
+; CHECK-NEXT: stp x20, x19, [sp, #80] // 16-byte Folded Spill
+; CHECK-NEXT: .cfi_def_cfa_offset 96
; CHECK-NEXT: .cfi_offset w19, -8
; CHECK-NEXT: .cfi_offset w20, -16
; CHECK-NEXT: .cfi_offset w21, -24
@@ -1035,22 +1023,31 @@ define <4 x i128> @test_unsigned_v4f32_v4i128(<4 x float> %f) {
; CHECK-NEXT: .cfi_offset w30, -64
; CHECK-NEXT: .cfi_offset b8, -72
; CHECK-NEXT: .cfi_offset b9, -80
+; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill
+; CHECK-NEXT: // kill: def $s0 killed $s0 killed $q0
+; CHECK-NEXT: bl __fixunssfti
+; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-NEXT: mov w8, #2139095039 // =0x7f7fffff
+; CHECK-NEXT: fmov s9, w8
; CHECK-NEXT: mov s8, v0.s[1]
-; CHECK-NEXT: str q0, [sp, #16] // 16-byte Folded Spill
+; CHECK-NEXT: fcmp s0, #0.0
+; CHECK-NEXT: csel x8, xzr, x1, lt
+; CHECK-NEXT: csel x9, xzr, x0, lt
+; CHECK-NEXT: fcmp s0, s9
; CHECK-NEXT: fmov s0, s8
+; CHECK-NEXT: csinv x19, x9, xzr, le
+; CHECK-NEXT: csinv x20, x8, xzr, le
; CHECK-NEXT: bl __fixunssfti
-; CHECK-NEXT: mov w8, #2139095039 // =0x7f7fffff
; CHECK-NEXT: fcmp s8, #0.0
-; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload
-; CHECK-NEXT: fmov s9, w8
+; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #8
; CHECK-NEXT: csel x8, xzr, x1, lt
; CHECK-NEXT: csel x9, xzr, x0, lt
; CHECK-NEXT: fcmp s8, s9
; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill
+; CHECK-NEXT: csinv x21, x9, xzr, le
+; CHECK-NEXT: csinv x22, x8, xzr, le
; CHECK-NEXT: // kill: def $s0 killed $s0 killed $q0
-; CHECK-NEXT: csinv x19, x9, xzr, le
-; CHECK-NEXT: csinv x20, x8, xzr, le
; CHECK-NEXT: bl __fixunssfti
; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
; CHECK-NEXT: mov s8, v0.s[1]
@@ -1059,40 +1056,27 @@ define <4 x i128> @test_unsigned_v4f32_v4i128(<4 x float> %f) {
; CHECK-NEXT: csel x9, xzr, x0, lt
; CHECK-NEXT: fcmp s0, s9
; CHECK-NEXT: fmov s0, s8
-; CHECK-NEXT: csinv x21, x9, xzr, le
-; CHECK-NEXT: csinv x22, x8, xzr, le
+; CHECK-NEXT: csinv x23, x9, xzr, le
+; CHECK-NEXT: csinv x24, x8, xzr, le
; CHECK-NEXT: bl __fixunssfti
; CHECK-NEXT: fcmp s8, #0.0
-; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload
-; CHECK-NEXT: // kill: def $s0 killed $s0 killed $q0
+; CHECK-NEXT: mov x2, x21
+; CHECK-NEXT: mov x3, x22
+; CHECK-NEXT: mov x4, x23
+; CHECK-NEXT: mov x5, x24
+; CHECK-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload
+; CHECK-NEXT: ldp x22, x21, [sp, #64] // 16-byte Folded Reload
; CHECK-NEXT: csel x8, xzr, x1, lt
; CHECK-NEXT: csel x9, xzr, x0, lt
; CHECK-NEXT: fcmp s8, s9
-; CHECK-NEXT: csinv x23, x9, xzr, le
-; CHECK-NEXT: csinv x24, x8, xzr, le
-; CHECK-NEXT: bl __fixunssfti
-; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload
-; CHECK-NEXT: mov x2, x19
-; CHECK-NEXT: mov x3, x20
-; CHECK-NEXT: mov x4, x21
-; CHECK-NEXT: mov x5, x22
-; CHECK-NEXT: mov x6, x23
-; CHECK-NEXT: fcmp s0, #0.0
-; CHECK-NEXT: mov x7, x24
-; CHECK-NEXT: ldr x30, [sp, #48] // 8-byte Folded Reload
-; CHECK-NEXT: ldp x20, x19, [sp, #96] // 16-byte Folded Reload
-; CHECK-NEXT: ldp x22, x21, [sp, #80] // 16-byte Folded Reload
-; CHECK-NEXT: csel x8, xzr, x0, lt
-; CHECK-NEXT: csel x9, xzr, x1, lt
-; CHECK-NEXT: fcmp s0, s9
-; CHECK-NEXT: ldp x24, x23, [sp, #64] // 16-byte Folded Reload
-; CHECK-NEXT: ldp d9, d8, [sp, #32] // 16-byte Folded Reload
-; CHECK-NEXT: csinv x8, x8, xzr, le
-; CHECK-NEXT: csinv x1, x9, xzr, le
-; CHECK-NEXT: fmov d0, x8
-; CHECK-NEXT: mov v0.d[1], x1
-; CHECK-NEXT: fmov x0, d0
-; CHECK-NEXT: add sp, sp, #112
+; CHECK-NEXT: mov x0, x19
+; CHECK-NEXT: mov x1, x20
+; CHECK-NEXT: ldp x20, x19, [sp, #80] // 16-byte Folded Reload
+; CHECK-NEXT: ldp x24, x23, [sp, #48] // 16-byte Folded Reload
+; CHECK-NEXT: csinv x6, x9, xzr, le
+; CHECK-NEXT: ldp d9, d8, [sp, #16] // 16-byte Folded Reload
+; CHECK-NEXT: csinv x7, x8, xzr, le
+; CHECK-NEXT: add sp, sp, #96
; CHECK-NEXT: ret
%x = call <4 x i128> @llvm.fptoui.sat.v4f32.v4i128(<4 x float> %f)
ret <4 x i128> %x
@@ -1261,37 +1245,33 @@ define <2 x i100> @test_unsigned_v2f64_v2i100(<2 x double> %f) {
; CHECK-NEXT: .cfi_offset w30, -32
; CHECK-NEXT: .cfi_offset b8, -40
; CHECK-NEXT: .cfi_offset b9, -48
-; CHECK-NEXT: mov d8, v0.d[1]
; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill
-; CHECK-NEXT: fmov d0, d8
+; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-NEXT: bl __fixunsdfti
+; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
; CHECK-NEXT: mov x8, #5057542381537067007 // =0x462fffffffffffff
-; CHECK-NEXT: fcmp d8, #0.0
; CHECK-NEXT: mov x21, #68719476735 // =0xfffffffff
; CHECK-NEXT: fmov d9, x8
-; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
-; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT: mov d8, v0.d[1]
+; CHECK-NEXT: fcmp d0, #0.0
; CHECK-NEXT: csel x8, xzr, x0, lt
; CHECK-NEXT: csel x9, xzr, x1, lt
-; CHECK-NEXT: fcmp d8, d9
+; CHECK-NEXT: fcmp d0, d9
+; CHECK-NEXT: fmov d0, d8
; CHECK-NEXT: csel x19, x21, x9, gt
; CHECK-NEXT: csinv x20, x8, xzr, le
; CHECK-NEXT: bl __fixunsdfti
-; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
-; CHECK-NEXT: mov x2, x20
-; CHECK-NEXT: mov x3, x19
-; CHECK-NEXT: ldp x20, x19, [sp, #48] // 16-byte Folded Reload
-; CHECK-NEXT: fcmp d0, #0.0
+; CHECK-NEXT: fcmp d8, #0.0
; CHECK-NEXT: csel x8, xzr, x0, lt
; CHECK-NEXT: csel x9, xzr, x1, lt
-; CHECK-NEXT: fcmp d0, d9
-; CHECK-NEXT: ldp d9, d8, [sp, #16] // 16-byte Folded Reload
-; CHECK-NEXT: csinv x8, x8, xzr, le
-; CHECK-NEXT: csel x1, x21, x9, gt
-; CHECK-NEXT: fmov d0, x8
+; CHECK-NEXT: fcmp d8, d9
+; CHECK-NEXT: mov x0, x20
+; CHECK-NEXT: mov x1, x19
+; CHECK-NEXT: ldp x20, x19, [sp, #48] // 16-byte Folded Reload
+; CHECK-NEXT: csel x3, x21, x9, gt
; CHECK-NEXT: ldp x30, x21, [sp, #32] // 16-byte Folded Reload
-; CHECK-NEXT: mov v0.d[1], x1
-; CHECK-NEXT: fmov x0, d0
+; CHECK-NEXT: ldp d9, d8, [sp, #16] // 16-byte Folded Reload
+; CHECK-NEXT: csinv x2, x8, xzr, le
; CHECK-NEXT: add sp, sp, #64
; CHECK-NEXT: ret
%x = call <2 x i100> @llvm.fptoui.sat.v2f64.v2i100(<2 x double> %f)
@@ -1311,36 +1291,32 @@ define <2 x i128> @test_unsigned_v2f64_v2i128(<2 x double> %f) {
; CHECK-NEXT: .cfi_offset w30, -32
; CHECK-NEXT: .cfi_offset b8, -40
; CHECK-NEXT: .cfi_offset b9, -48
-; CHECK-NEXT: mov d8, v0.d[1]
; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill
-; CHECK-NEXT: fmov d0, d8
+; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-NEXT: bl __fixunsdfti
-; CHECK-NEXT: mov x8, #5183643171103440895 // =0x47efffffffffffff
-; CHECK-NEXT: fcmp d8, #0.0
; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-NEXT: mov x8, #5183643171103440895 // =0x47efffffffffffff
; CHECK-NEXT: fmov d9, x8
-; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT: mov d8, v0.d[1]
+; CHECK-NEXT: fcmp d0, #0.0
; CHECK-NEXT: csel x8, xzr, x1, lt
; CHECK-NEXT: csel x9, xzr, x0, lt
-; CHECK-NEXT: fcmp d8, d9
+; CHECK-NEXT: fcmp d0, d9
+; CHECK-NEXT: fmov d0, d8
; CHECK-NEXT: csinv x19, x9, xzr, le
; CHECK-NEXT: csinv x20, x8, xzr, le
; CHECK-NEXT: bl __fixunsdfti
-; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
-; CHECK-NEXT: mov x2, x19
-; CHECK-NEXT: mov x3, x20
-; CHECK-NEXT: ldp x20, x19, [sp, #48] // 16-byte Folded Reload
+; CHECK-NEXT: fcmp d8, #0.0
; CHECK-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload
-; CHECK-NEXT: fcmp d0, #0.0
-; CHECK-NEXT: csel x8, xzr, x0, lt
-; CHECK-NEXT: csel x9, xzr, x1, lt
-; CHECK-NEXT: fcmp d0, d9
+; CHECK-NEXT: csel x8, xzr, x1, lt
+; CHECK-NEXT: csel x9, xzr, x0, lt
+; CHECK-NEXT: fcmp d8, d9
+; CHECK-NEXT: mov x0, x19
+; CHECK-NEXT: mov x1, x20
+; CHECK-NEXT: ldp x20, x19, [sp, #48] // 16-byte Folded Reload
; CHECK-NEXT: ldp d9, d8, [sp, #16] // 16-byte Folded Reload
-; CHECK-NEXT: csinv x8, x8, xzr, le
-; CHECK-NEXT: csinv x1, x9, xzr, le
-; CHECK-NEXT: fmov d0, x8
-; CHECK-NEXT: mov v0.d[1], x1
-; CHECK-NEXT: fmov x0, d0
+; CHECK-NEXT: csinv x2, x9, xzr, le
+; CHECK-NEXT: csinv x3, x8, xzr, le
; CHECK-NEXT: add sp, sp, #64
; CHECK-NEXT: ret
%x = call <2 x i128> @llvm.fptoui.sat.v2f64.v2i128(<2 x double> %f)
@@ -1570,7 +1546,7 @@ define <4 x i100> @test_unsigned_v4f16_v4i100(<4 x half> %f) {
; CHECK-NEXT: .cfi_offset b8, -72
; CHECK-NEXT: .cfi_offset b9, -80
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT: mov h1, v0.h[2]
+; CHECK-NEXT: mov h1, v0.h[1]
; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill
; CHECK-NEXT: fcvt s8, h1
; CHECK-NEXT: fmov s0, s8
@@ -1580,7 +1556,7 @@ define <4 x i100> @test_unsigned_v4f16_v4i100(<4 x half> %f) {
; CHECK-NEXT: fcmp s8, #0.0
; CHECK-NEXT: fmov s9, w8
; CHECK-NEXT: mov x25, #68719476735 // =0xfffffffff
-; CHECK-NEXT: mov h0, v0.h[1]
+; CHECK-NEXT: mov h0, v0.h[2]
; CHECK-NEXT: csel x9, xzr, x0, lt
; CHECK-NEXT: csel x8, xzr, x1, lt
; CHECK-NEXT: fcmp s8, s9
@@ -1589,9 +1565,8 @@ define <4 x i100> @test_unsigned_v4f16_v4i100(<4 x half> %f) {
; CHECK-NEXT: csinv x20, x9, xzr, le
; CHECK-NEXT: fmov s0, s8
; CHECK-NEXT: bl __fixunssfti
-; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
; CHECK-NEXT: fcmp s8, #0.0
-; CHECK-NEXT: mov h0, v0.h[3]
+; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
; CHECK-NEXT: csel x8, xzr, x0, lt
; CHECK-NEXT: csel x9, xzr, x1, lt
; CHECK-NEXT: fcmp s8, s9
@@ -1600,8 +1575,9 @@ define <4 x i100> @test_unsigned_v4f16_v4i100(<4 x half> %f) {
; CHECK-NEXT: csinv x22, x8, xzr, le
; CHECK-NEXT: fmov s0, s8
; CHECK-NEXT: bl __fixunssfti
-; CHECK-NEXT: fcmp s8, #0.0
; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-NEXT: fcmp s8, #0.0
+; CHECK-NEXT: mov h0, v0.h[3]
; CHECK-NEXT: csel x8, xzr, x0, lt
; CHECK-NEXT: csel x9, xzr, x1, lt
; CHECK-NEXT: fcmp s8, s9
@@ -1611,25 +1587,22 @@ define <4 x i100> @test_unsigned_v4f16_v4i100(<4 x half> %f) {
; CHECK-NEXT: fmov s0, s8
; CHECK-NEXT: bl __fixunssfti
; CHECK-NEXT: fcmp s8, #0.0
-; CHECK-NEXT: mov x2, x22
-; CHECK-NEXT: mov x3, x21
-; CHECK-NEXT: mov x4, x20
-; CHECK-NEXT: mov x5, x19
-; CHECK-NEXT: mov x6, x24
-; CHECK-NEXT: mov x7, x23
+; CHECK-NEXT: mov x2, x20
+; CHECK-NEXT: mov x3, x19
+; CHECK-NEXT: mov x4, x22
+; CHECK-NEXT: mov x5, x21
; CHECK-NEXT: ldp x20, x19, [sp, #80] // 16-byte Folded Reload
; CHECK-NEXT: csel x8, xzr, x0, lt
; CHECK-NEXT: csel x9, xzr, x1, lt
; CHECK-NEXT: fcmp s8, s9
+; CHECK-NEXT: mov x0, x24
+; CHECK-NEXT: mov x1, x23
; CHECK-NEXT: ldp x22, x21, [sp, #64] // 16-byte Folded Reload
+; CHECK-NEXT: csel x7, x25, x9, gt
; CHECK-NEXT: ldp x24, x23, [sp, #48] // 16-byte Folded Reload
-; CHECK-NEXT: csinv x8, x8, xzr, le
-; CHECK-NEXT: csel x1, x25, x9, gt
-; CHECK-NEXT: fmov d0, x8
; CHECK-NEXT: ldp x30, x25, [sp, #32] // 16-byte Folded Reload
+; CHECK-NEXT: csinv x6, x8, xzr, le
; CHECK-NEXT: ldp d9, d8, [sp, #16] // 16-byte Folded Reload
-; CHECK-NEXT: mov v0.d[1], x1
-; CHECK-NEXT: fmov x0, d0
; CHECK-NEXT: add sp, sp, #96
; CHECK-NEXT: ret
%x = call <4 x i100> @llvm.fptoui.sat.v4f16.v4i100(<4 x half> %f)
@@ -1656,16 +1629,15 @@ define <4 x i128> @test_unsigned_v4f16_v4i128(<4 x half> %f) {
; CHECK-NEXT: .cfi_offset b8, -72
; CHECK-NEXT: .cfi_offset b9, -80
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT: mov h1, v0.h[1]
+; CHECK-NEXT: fcvt s8, h0
; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill
-; CHECK-NEXT: fcvt s8, h1
; CHECK-NEXT: fmov s0, s8
; CHECK-NEXT: bl __fixunssfti
; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
; CHECK-NEXT: mov w8, #2139095039 // =0x7f7fffff
; CHECK-NEXT: fcmp s8, #0.0
; CHECK-NEXT: fmov s9, w8
-; CHECK-NEXT: mov h0, v0.h[2]
+; CHECK-NEXT: mov h0, v0.h[1]
; CHECK-NEXT: csel x9, xzr, x1, lt
; CHECK-NEXT: csel x8, xzr, x0, lt
; CHECK-NEXT: fcmp s8, s9
@@ -1676,7 +1648,7 @@ define <4 x i128> @test_unsigned_v4f16_v4i128(<4 x half> %f) {
; CHECK-NEXT: bl __fixunssfti
; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
; CHECK-NEXT: fcmp s8, #0.0
-; CHECK-NEXT: mov h0, v0.h[3]
+; CHECK-NEXT: mov h0, v0.h[2]
; CHECK-NEXT: csel x8, xzr, x1, lt
; CHECK-NEXT: csel x9, xzr, x0, lt
; CHECK-NEXT: fcmp s8, s9
@@ -1685,8 +1657,9 @@ define <4 x i128> @test_unsigned_v4f16_v4i128(<4 x half> %f) {
; CHECK-NEXT: csinv x22, x8, xzr, le
; CHECK-NEXT: fmov s0, s8
; CHECK-NEXT: bl __fixunssfti
-; CHECK-NEXT: fcmp s8, #0.0
; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-NEXT: fcmp s8, #0.0
+; CHECK-NEXT: mov h0, v0.h[3]
; CHECK-NEXT: csel x8, xzr, x1, lt
; CHECK-NEXT: csel x9, xzr, x0, lt
; CHECK-NEXT: fcmp s8, s9
@@ -1696,25 +1669,22 @@ define <4 x i128> @test_unsigned_v4f16_v4i128(<4 x half> %f) {
; CHECK-NEXT: fmov s0, s8
; CHECK-NEXT: bl __fixunssfti
; CHECK-NEXT: fcmp s8, #0.0
-; CHECK-NEXT: mov x2, x19
-; CHECK-NEXT: mov x3, x20
-; CHECK-NEXT: mov x4, x21
-; CHECK-NEXT: mov x5, x22
-; CHECK-NEXT: mov x6, x23
-; CHECK-NEXT: mov x7, x24
-; CHECK-NEXT: ldp x20, x19, [sp, #80] // 16-byte Folded Reload
-; CHECK-NEXT: csel x8, xzr, x0, lt
-; CHECK-NEXT: csel x9, xzr, x1, lt
-; CHECK-NEXT: fcmp s8, s9
-; CHECK-NEXT: ldp x22, x21, [sp, #64] // 16-byte Folded Reload
+; CHECK-NEXT: mov x2, x21
+; CHECK-NEXT: mov x3, x22
+; CHECK-NEXT: mov x4, x23
+; CHECK-NEXT: mov x5, x24
; CHECK-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload
+; CHECK-NEXT: ldp x22, x21, [sp, #64] // 16-byte Folded Reload
+; CHECK-NEXT: csel x8, xzr, x1, lt
+; CHECK-NEXT: csel x9, xzr, x0, lt
+; CHECK-NEXT: fcmp s8, s9
+; CHECK-NEXT: mov x0, x19
+; CHECK-NEXT: mov x1, x20
+; CHECK-NEXT: ldp x20, x19, [sp, #80] // 16-byte Folded Reload
; CHECK-NEXT: ldp x24, x23, [sp, #48] // 16-byte Folded Reload
-; CHECK-NEXT: csinv x8, x8, xzr, le
-; CHECK-NEXT: csinv x1, x9, xzr, le
-; CHECK-NEXT: fmov d0, x8
+; CHECK-NEXT: csinv x6, x9, xzr, le
; CHECK-NEXT: ldp d9, d8, [sp, #16] // 16-byte Folded Reload
-; CHECK-NEXT: mov v0.d[1], x1
-; CHECK-NEXT: fmov x0, d0
+; CHECK-NEXT: csinv x7, x8, xzr, le
; CHECK-NEXT: add sp, sp, #96
; CHECK-NEXT: ret
%x = call <4 x i128> @llvm.fptoui.sat.v4f16.v4i128(<4 x half> %f)
diff --git a/llvm/test/CodeGen/AArch64/nontemporal-load.ll b/llvm/test/CodeGen/AArch64/nontemporal-load.ll
index e93fb40..ed84ec2 100644
--- a/llvm/test/CodeGen/AArch64/nontemporal-load.ll
+++ b/llvm/test/CodeGen/AArch64/nontemporal-load.ll
@@ -473,52 +473,37 @@ define <33 x i8> @test_ldnp_v33i8(ptr %A) {
define <4 x i65> @test_ldnp_v4i65(ptr %A) {
; CHECK-LABEL: test_ldnp_v4i65:
; CHECK: ; %bb.0:
-; CHECK-NEXT: ldp x8, x9, [x0, #8]
-; CHECK-NEXT: ldr d0, [x0]
-; CHECK-NEXT: ldr x10, [x0, #24]
+; CHECK-NEXT: ldp x8, x9, [x0, #16]
; CHECK-NEXT: ldrb w11, [x0, #32]
-; CHECK-NEXT: and x1, x8, #0x1
-; CHECK-NEXT: extr x2, x9, x8, #1
-; CHECK-NEXT: extr x4, x10, x9, #2
-; CHECK-NEXT: mov.d v0[1], x1
-; CHECK-NEXT: extr x6, x11, x10, #3
-; CHECK-NEXT: ubfx x3, x9, #1, #1
-; CHECK-NEXT: ubfx x5, x10, #2, #1
+; CHECK-NEXT: ldp x0, x10, [x0]
; CHECK-NEXT: ubfx x7, x11, #3, #1
-; CHECK-NEXT: fmov x0, d0
+; CHECK-NEXT: extr x4, x9, x8, #2
+; CHECK-NEXT: extr x6, x11, x9, #3
+; CHECK-NEXT: ubfx x3, x8, #1, #1
+; CHECK-NEXT: extr x2, x8, x10, #1
+; CHECK-NEXT: ubfx x5, x9, #2, #1
+; CHECK-NEXT: and x1, x10, #0x1
; CHECK-NEXT: ret
;
; CHECK-BE-LABEL: test_ldnp_v4i65:
; CHECK-BE: // %bb.0:
-; CHECK-BE-NEXT: ldp x10, x9, [x0]
-; CHECK-BE-NEXT: ldrb w8, [x0, #32]
-; CHECK-BE-NEXT: ldp x12, x11, [x0, #16]
-; CHECK-BE-NEXT: lsr x13, x10, #56
-; CHECK-BE-NEXT: orr x7, x8, x11, lsl #8
-; CHECK-BE-NEXT: extr x8, x10, x9, #56
-; CHECK-BE-NEXT: extr x11, x12, x11, #56
-; CHECK-BE-NEXT: lsr x14, x12, #56
-; CHECK-BE-NEXT: extr x15, x9, x12, #56
-; CHECK-BE-NEXT: lsr x10, x10, #59
-; CHECK-BE-NEXT: extr x1, x13, x8, #3
-; CHECK-BE-NEXT: lsr x8, x9, #56
-; CHECK-BE-NEXT: ubfx x12, x12, #57, #1
-; CHECK-BE-NEXT: ubfx x9, x9, #58, #1
-; CHECK-BE-NEXT: extr x5, x14, x11, #1
-; CHECK-BE-NEXT: and x11, x11, #0x1
-; CHECK-BE-NEXT: fmov d0, x10
-; CHECK-BE-NEXT: fmov d2, x12
-; CHECK-BE-NEXT: fmov d3, x11
-; CHECK-BE-NEXT: fmov d1, x9
-; CHECK-BE-NEXT: extr x3, x8, x15, #2
-; CHECK-BE-NEXT: mov v0.d[1], x1
-; CHECK-BE-NEXT: mov v2.d[1], x5
-; CHECK-BE-NEXT: mov v3.d[1], x7
-; CHECK-BE-NEXT: mov v1.d[1], x3
-; CHECK-BE-NEXT: fmov x0, d0
-; CHECK-BE-NEXT: fmov x4, d2
-; CHECK-BE-NEXT: fmov x6, d3
-; CHECK-BE-NEXT: fmov x2, d1
+; CHECK-BE-NEXT: ldp x9, x8, [x0]
+; CHECK-BE-NEXT: ldrb w12, [x0, #32]
+; CHECK-BE-NEXT: ldp x10, x11, [x0, #16]
+; CHECK-BE-NEXT: extr x13, x9, x8, #56
+; CHECK-BE-NEXT: lsr x14, x9, #56
+; CHECK-BE-NEXT: lsr x16, x8, #56
+; CHECK-BE-NEXT: extr x15, x8, x10, #56
+; CHECK-BE-NEXT: orr x7, x12, x11, lsl #8
+; CHECK-BE-NEXT: extr x11, x10, x11, #56
+; CHECK-BE-NEXT: lsr x12, x10, #56
+; CHECK-BE-NEXT: extr x1, x14, x13, #3
+; CHECK-BE-NEXT: lsr x0, x9, #59
+; CHECK-BE-NEXT: ubfx x2, x8, #58, #1
+; CHECK-BE-NEXT: ubfx x4, x10, #57, #1
+; CHECK-BE-NEXT: extr x3, x16, x15, #2
+; CHECK-BE-NEXT: extr x5, x12, x11, #1
+; CHECK-BE-NEXT: and x6, x11, #0x1
; CHECK-BE-NEXT: ret
%lv = load <4 x i65>, ptr %A, align 8, !nontemporal !0
ret <4 x i65> %lv
diff --git a/llvm/test/CodeGen/AArch64/sadd_sat_vec.ll b/llvm/test/CodeGen/AArch64/sadd_sat_vec.ll
index 84179d3..fa0447c 100644
--- a/llvm/test/CodeGen/AArch64/sadd_sat_vec.ll
+++ b/llvm/test/CodeGen/AArch64/sadd_sat_vec.ll
@@ -483,21 +483,18 @@ define <8 x i64> @v8i64(<8 x i64> %x, <8 x i64> %y) nounwind {
define <2 x i128> @v2i128(<2 x i128> %x, <2 x i128> %y) nounwind {
; CHECK-LABEL: v2i128:
; CHECK: // %bb.0:
+; CHECK-NEXT: adds x8, x0, x4
+; CHECK-NEXT: adcs x9, x1, x5
+; CHECK-NEXT: asr x10, x9, #63
+; CHECK-NEXT: eor x11, x10, #0x8000000000000000
+; CHECK-NEXT: csel x0, x10, x8, vs
+; CHECK-NEXT: csel x1, x11, x9, vs
; CHECK-NEXT: adds x8, x2, x6
; CHECK-NEXT: adcs x9, x3, x7
; CHECK-NEXT: asr x10, x9, #63
; CHECK-NEXT: eor x11, x10, #0x8000000000000000
; CHECK-NEXT: csel x2, x10, x8, vs
; CHECK-NEXT: csel x3, x11, x9, vs
-; CHECK-NEXT: adds x8, x0, x4
-; CHECK-NEXT: adcs x9, x1, x5
-; CHECK-NEXT: asr x10, x9, #63
-; CHECK-NEXT: csel x8, x10, x8, vs
-; CHECK-NEXT: eor x11, x10, #0x8000000000000000
-; CHECK-NEXT: fmov d0, x8
-; CHECK-NEXT: csel x1, x11, x9, vs
-; CHECK-NEXT: mov v0.d[1], x1
-; CHECK-NEXT: fmov x0, d0
; CHECK-NEXT: ret
%z = call <2 x i128> @llvm.sadd.sat.v2i128(<2 x i128> %x, <2 x i128> %y)
ret <2 x i128> %z
diff --git a/llvm/test/CodeGen/AArch64/ssub_sat_vec.ll b/llvm/test/CodeGen/AArch64/ssub_sat_vec.ll
index aca9e58..d8b2762 100644
--- a/llvm/test/CodeGen/AArch64/ssub_sat_vec.ll
+++ b/llvm/test/CodeGen/AArch64/ssub_sat_vec.ll
@@ -486,21 +486,18 @@ define <8 x i64> @v8i64(<8 x i64> %x, <8 x i64> %y) nounwind {
define <2 x i128> @v2i128(<2 x i128> %x, <2 x i128> %y) nounwind {
; CHECK-LABEL: v2i128:
; CHECK: // %bb.0:
+; CHECK-NEXT: subs x8, x0, x4
+; CHECK-NEXT: sbcs x9, x1, x5
+; CHECK-NEXT: asr x10, x9, #63
+; CHECK-NEXT: eor x11, x10, #0x8000000000000000
+; CHECK-NEXT: csel x0, x10, x8, vs
+; CHECK-NEXT: csel x1, x11, x9, vs
; CHECK-NEXT: subs x8, x2, x6
; CHECK-NEXT: sbcs x9, x3, x7
; CHECK-NEXT: asr x10, x9, #63
; CHECK-NEXT: eor x11, x10, #0x8000000000000000
; CHECK-NEXT: csel x2, x10, x8, vs
; CHECK-NEXT: csel x3, x11, x9, vs
-; CHECK-NEXT: subs x8, x0, x4
-; CHECK-NEXT: sbcs x9, x1, x5
-; CHECK-NEXT: asr x10, x9, #63
-; CHECK-NEXT: csel x8, x10, x8, vs
-; CHECK-NEXT: eor x11, x10, #0x8000000000000000
-; CHECK-NEXT: fmov d0, x8
-; CHECK-NEXT: csel x1, x11, x9, vs
-; CHECK-NEXT: mov v0.d[1], x1
-; CHECK-NEXT: fmov x0, d0
; CHECK-NEXT: ret
%z = call <2 x i128> @llvm.ssub.sat.v2i128(<2 x i128> %x, <2 x i128> %y)
ret <2 x i128> %z
diff --git a/llvm/test/CodeGen/AArch64/uadd_sat_vec.ll b/llvm/test/CodeGen/AArch64/uadd_sat_vec.ll
index 3c6c1f1..afc0d87 100644
--- a/llvm/test/CodeGen/AArch64/uadd_sat_vec.ll
+++ b/llvm/test/CodeGen/AArch64/uadd_sat_vec.ll
@@ -477,17 +477,14 @@ define <8 x i64> @v8i64(<8 x i64> %x, <8 x i64> %y) nounwind {
define <2 x i128> @v2i128(<2 x i128> %x, <2 x i128> %y) nounwind {
; CHECK-LABEL: v2i128:
; CHECK: // %bb.0:
+; CHECK-NEXT: adds x8, x0, x4
+; CHECK-NEXT: adcs x9, x1, x5
+; CHECK-NEXT: csinv x0, x8, xzr, lo
+; CHECK-NEXT: csinv x1, x9, xzr, lo
; CHECK-NEXT: adds x8, x2, x6
; CHECK-NEXT: adcs x9, x3, x7
; CHECK-NEXT: csinv x2, x8, xzr, lo
; CHECK-NEXT: csinv x3, x9, xzr, lo
-; CHECK-NEXT: adds x8, x0, x4
-; CHECK-NEXT: adcs x9, x1, x5
-; CHECK-NEXT: csinv x8, x8, xzr, lo
-; CHECK-NEXT: csinv x1, x9, xzr, lo
-; CHECK-NEXT: fmov d0, x8
-; CHECK-NEXT: mov v0.d[1], x1
-; CHECK-NEXT: fmov x0, d0
; CHECK-NEXT: ret
%z = call <2 x i128> @llvm.uadd.sat.v2i128(<2 x i128> %x, <2 x i128> %y)
ret <2 x i128> %z
diff --git a/llvm/test/CodeGen/AArch64/usub_sat_vec.ll b/llvm/test/CodeGen/AArch64/usub_sat_vec.ll
index 363c12e..dfcbe96 100644
--- a/llvm/test/CodeGen/AArch64/usub_sat_vec.ll
+++ b/llvm/test/CodeGen/AArch64/usub_sat_vec.ll
@@ -475,17 +475,14 @@ define <8 x i64> @v8i64(<8 x i64> %x, <8 x i64> %y) nounwind {
define <2 x i128> @v2i128(<2 x i128> %x, <2 x i128> %y) nounwind {
; CHECK-LABEL: v2i128:
; CHECK: // %bb.0:
+; CHECK-NEXT: subs x8, x0, x4
+; CHECK-NEXT: sbcs x9, x1, x5
+; CHECK-NEXT: csel x0, xzr, x8, lo
+; CHECK-NEXT: csel x1, xzr, x9, lo
; CHECK-NEXT: subs x8, x2, x6
; CHECK-NEXT: sbcs x9, x3, x7
; CHECK-NEXT: csel x2, xzr, x8, lo
; CHECK-NEXT: csel x3, xzr, x9, lo
-; CHECK-NEXT: subs x8, x0, x4
-; CHECK-NEXT: sbcs x9, x1, x5
-; CHECK-NEXT: csel x8, xzr, x8, lo
-; CHECK-NEXT: csel x1, xzr, x9, lo
-; CHECK-NEXT: fmov d0, x8
-; CHECK-NEXT: mov v0.d[1], x1
-; CHECK-NEXT: fmov x0, d0
; CHECK-NEXT: ret
%z = call <2 x i128> @llvm.usub.sat.v2i128(<2 x i128> %x, <2 x i128> %y)
ret <2 x i128> %z
diff --git a/llvm/test/CodeGen/Thumb2/mve-fptosi-sat-vector.ll b/llvm/test/CodeGen/Thumb2/mve-fptosi-sat-vector.ll
index 570834f..81b6a69 100644
--- a/llvm/test/CodeGen/Thumb2/mve-fptosi-sat-vector.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-fptosi-sat-vector.ll
@@ -2318,43 +2318,41 @@ define arm_aapcs_vfpcc <4 x i100> @test_signed_v4f32_v4i100(<4 x float> %f) {
; CHECK-NEXT: vcmp.f32 s17, s20
; CHECK-NEXT: strb.w r4, [r9, #49]
; CHECK-NEXT: it lt
-; CHECK-NEXT: mvnlt r3, #7
+; CHECK-NEXT: movlt r1, #0
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: vcmp.f32 s17, s17
; CHECK-NEXT: it gt
-; CHECK-NEXT: movgt r3, #7
+; CHECK-NEXT: movgt.w r1, #-1
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: vcmp.f32 s17, s22
; CHECK-NEXT: it vs
-; CHECK-NEXT: movvs r3, #0
+; CHECK-NEXT: movvs r1, #0
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: it lt
-; CHECK-NEXT: movlt r1, #0
+; CHECK-NEXT: movlt r2, #0
; CHECK-NEXT: vcmp.f32 s17, s20
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: lsrl r0, r1, #28
; CHECK-NEXT: it gt
-; CHECK-NEXT: movgt.w r1, #-1
+; CHECK-NEXT: movgt.w r2, #-1
; CHECK-NEXT: vcmp.f32 s17, s17
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: it vs
-; CHECK-NEXT: movvs r1, #0
-; CHECK-NEXT: vmov q0[3], q0[1], r1, r3
+; CHECK-NEXT: movvs r2, #0
+; CHECK-NEXT: orr.w r1, r1, r2, lsl #4
; CHECK-NEXT: vcmp.f32 s17, s22
-; CHECK-NEXT: vmov r1, s1
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: it lt
-; CHECK-NEXT: movlt r2, #0
; CHECK-NEXT: vcmp.f32 s17, s20
+; CHECK-NEXT: strd r0, r1, [r9, #16]
+; CHECK-NEXT: it lt
+; CHECK-NEXT: mvnlt r3, #7
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: lsrl r0, r1, #28
; CHECK-NEXT: it gt
-; CHECK-NEXT: movgt.w r2, #-1
+; CHECK-NEXT: movgt r3, #7
; CHECK-NEXT: vcmp.f32 s17, s17
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: it vs
-; CHECK-NEXT: movvs r2, #0
-; CHECK-NEXT: orr.w r1, r1, r2, lsl #4
-; CHECK-NEXT: strd r0, r1, [r9, #16]
+; CHECK-NEXT: movvs r3, #0
; CHECK-NEXT: and r1, r3, #15
; CHECK-NEXT: lsrl r2, r1, #28
; CHECK-NEXT: strb.w r2, [r9, #24]
@@ -3687,268 +3685,257 @@ define arm_aapcs_vfpcc <2 x i100> @test_signed_v2f64_v2i100(<2 x double> %f) {
; CHECK-NEXT: sub sp, #48
; CHECK-NEXT: vmov q4, q0
; CHECK-NEXT: vldr d0, .LCPI40_0
-; CHECK-NEXT: vmov r5, r7, d8
-; CHECK-NEXT: mov r10, r0
-; CHECK-NEXT: vmov r9, r3, d0
-; CHECK-NEXT: str r0, [sp, #40] @ 4-byte Spill
-; CHECK-NEXT: str r3, [sp, #32] @ 4-byte Spill
-; CHECK-NEXT: mov r0, r5
-; CHECK-NEXT: mov r1, r7
-; CHECK-NEXT: mov r2, r9
+; CHECK-NEXT: vmov r10, r9, d8
+; CHECK-NEXT: mov r6, r0
+; CHECK-NEXT: vmov r7, r3, d0
+; CHECK-NEXT: str r3, [sp, #40] @ 4-byte Spill
+; CHECK-NEXT: mov r0, r10
+; CHECK-NEXT: mov r1, r9
+; CHECK-NEXT: mov r2, r7
+; CHECK-NEXT: str r7, [sp, #44] @ 4-byte Spill
; CHECK-NEXT: bl __aeabi_dcmpgt
; CHECK-NEXT: vldr d0, .LCPI40_1
-; CHECK-NEXT: mov r6, r0
-; CHECK-NEXT: mov r0, r5
-; CHECK-NEXT: mov r1, r7
+; CHECK-NEXT: mov r5, r0
+; CHECK-NEXT: mov r0, r10
+; CHECK-NEXT: mov r1, r9
; CHECK-NEXT: vmov r8, r3, d0
; CHECK-NEXT: mov r2, r8
; CHECK-NEXT: mov r11, r3
; CHECK-NEXT: bl __aeabi_dcmpge
; CHECK-NEXT: mov r4, r0
-; CHECK-NEXT: mov r0, r5
-; CHECK-NEXT: mov r1, r7
+; CHECK-NEXT: mov r0, r10
+; CHECK-NEXT: mov r1, r9
; CHECK-NEXT: bl __fixdfti
; CHECK-NEXT: cmp r4, #0
-; CHECK-NEXT: str r0, [sp, #36] @ 4-byte Spill
+; CHECK-NEXT: strd r1, r0, [sp, #8] @ 8-byte Folded Spill
; CHECK-NEXT: csel r4, r2, r4, ne
-; CHECK-NEXT: str r1, [sp, #16] @ 4-byte Spill
-; CHECK-NEXT: str r3, [sp, #24] @ 4-byte Spill
-; CHECK-NEXT: mov r0, r5
-; CHECK-NEXT: mov r1, r7
-; CHECK-NEXT: mov r2, r5
-; CHECK-NEXT: mov r3, r7
-; CHECK-NEXT: cmp r6, #0
+; CHECK-NEXT: str r3, [sp, #28] @ 4-byte Spill
+; CHECK-NEXT: mov r0, r10
+; CHECK-NEXT: mov r1, r9
+; CHECK-NEXT: mov r2, r10
+; CHECK-NEXT: mov r3, r9
+; CHECK-NEXT: cmp r5, #0
; CHECK-NEXT: it ne
; CHECK-NEXT: movne.w r4, #-1
; CHECK-NEXT: bl __aeabi_dcmpun
; CHECK-NEXT: cmp r0, #0
; CHECK-NEXT: it ne
; CHECK-NEXT: movne r4, #0
-; CHECK-NEXT: str.w r4, [r10, #8]
-; CHECK-NEXT: mov r0, r5
-; CHECK-NEXT: ldr r4, [sp, #32] @ 4-byte Reload
-; CHECK-NEXT: mov r1, r7
-; CHECK-NEXT: mov r2, r9
-; CHECK-NEXT: str.w r9, [sp, #44] @ 4-byte Spill
-; CHECK-NEXT: mov r3, r4
+; CHECK-NEXT: str r4, [r6, #8]
+; CHECK-NEXT: mov r0, r10
+; CHECK-NEXT: ldr r5, [sp, #40] @ 4-byte Reload
+; CHECK-NEXT: mov r1, r9
+; CHECK-NEXT: mov r2, r7
+; CHECK-NEXT: mov r3, r5
; CHECK-NEXT: bl __aeabi_dcmpgt
-; CHECK-NEXT: mov r10, r0
-; CHECK-NEXT: mov r0, r5
-; CHECK-NEXT: mov r1, r7
+; CHECK-NEXT: mov r4, r0
+; CHECK-NEXT: mov r0, r10
+; CHECK-NEXT: mov r1, r9
; CHECK-NEXT: mov r2, r8
; CHECK-NEXT: mov r3, r11
+; CHECK-NEXT: str.w r8, [sp, #32] @ 4-byte Spill
; CHECK-NEXT: bl __aeabi_dcmpge
-; CHECK-NEXT: ldr r1, [sp, #16] @ 4-byte Reload
+; CHECK-NEXT: ldr r1, [sp, #8] @ 4-byte Reload
; CHECK-NEXT: cmp r0, #0
-; CHECK-NEXT: mov r2, r5
-; CHECK-NEXT: mov r3, r7
-; CHECK-NEXT: csel r6, r1, r0, ne
-; CHECK-NEXT: mov r0, r5
-; CHECK-NEXT: mov r1, r7
-; CHECK-NEXT: cmp.w r10, #0
+; CHECK-NEXT: mov r2, r10
+; CHECK-NEXT: mov r3, r9
+; CHECK-NEXT: csel r7, r1, r0, ne
+; CHECK-NEXT: mov r0, r10
+; CHECK-NEXT: mov r1, r9
+; CHECK-NEXT: cmp r4, #0
; CHECK-NEXT: it ne
-; CHECK-NEXT: movne.w r6, #-1
+; CHECK-NEXT: movne.w r7, #-1
; CHECK-NEXT: bl __aeabi_dcmpun
; CHECK-NEXT: cmp r0, #0
; CHECK-NEXT: it ne
-; CHECK-NEXT: movne r6, #0
-; CHECK-NEXT: ldr r0, [sp, #40] @ 4-byte Reload
-; CHECK-NEXT: mov r1, r7
-; CHECK-NEXT: mov r2, r9
-; CHECK-NEXT: mov r3, r4
-; CHECK-NEXT: str r6, [r0, #4]
-; CHECK-NEXT: mov r0, r5
+; CHECK-NEXT: movne r7, #0
+; CHECK-NEXT: str r7, [r6, #4]
+; CHECK-NEXT: mov r0, r10
+; CHECK-NEXT: ldr r2, [sp, #44] @ 4-byte Reload
+; CHECK-NEXT: mov r1, r9
+; CHECK-NEXT: mov r3, r5
+; CHECK-NEXT: str r6, [sp, #16] @ 4-byte Spill
; CHECK-NEXT: bl __aeabi_dcmpgt
; CHECK-NEXT: mov r4, r0
-; CHECK-NEXT: mov r0, r5
-; CHECK-NEXT: mov r1, r7
+; CHECK-NEXT: mov r0, r10
+; CHECK-NEXT: mov r1, r9
; CHECK-NEXT: mov r2, r8
; CHECK-NEXT: mov r3, r11
-; CHECK-NEXT: mov r10, r8
+; CHECK-NEXT: str.w r11, [sp, #36] @ 4-byte Spill
; CHECK-NEXT: bl __aeabi_dcmpge
-; CHECK-NEXT: ldr r1, [sp, #36] @ 4-byte Reload
+; CHECK-NEXT: ldr r1, [sp, #12] @ 4-byte Reload
; CHECK-NEXT: cmp r0, #0
-; CHECK-NEXT: mov r2, r5
-; CHECK-NEXT: mov r3, r7
-; CHECK-NEXT: csel r6, r1, r0, ne
-; CHECK-NEXT: mov r0, r5
-; CHECK-NEXT: mov r1, r7
+; CHECK-NEXT: mov r2, r10
+; CHECK-NEXT: mov r3, r9
+; CHECK-NEXT: csel r7, r1, r0, ne
+; CHECK-NEXT: mov r0, r10
+; CHECK-NEXT: mov r1, r9
; CHECK-NEXT: cmp r4, #0
; CHECK-NEXT: it ne
-; CHECK-NEXT: movne.w r6, #-1
-; CHECK-NEXT: str r5, [sp, #12] @ 4-byte Spill
-; CHECK-NEXT: str r7, [sp, #20] @ 4-byte Spill
+; CHECK-NEXT: movne.w r7, #-1
+; CHECK-NEXT: str.w r10, [sp, #20] @ 4-byte Spill
+; CHECK-NEXT: str.w r9, [sp, #24] @ 4-byte Spill
; CHECK-NEXT: bl __aeabi_dcmpun
; CHECK-NEXT: vmov r9, r8, d9
; CHECK-NEXT: cmp r0, #0
; CHECK-NEXT: it ne
-; CHECK-NEXT: movne r6, #0
-; CHECK-NEXT: ldr r0, [sp, #40] @ 4-byte Reload
-; CHECK-NEXT: str r6, [r0]
-; CHECK-NEXT: ldr r6, [sp, #32] @ 4-byte Reload
-; CHECK-NEXT: ldr r2, [sp, #44] @ 4-byte Reload
-; CHECK-NEXT: mov r3, r6
+; CHECK-NEXT: movne r7, #0
+; CHECK-NEXT: str r7, [r6]
+; CHECK-NEXT: ldr r6, [sp, #44] @ 4-byte Reload
+; CHECK-NEXT: mov r3, r5
+; CHECK-NEXT: mov r2, r6
; CHECK-NEXT: mov r0, r9
; CHECK-NEXT: mov r1, r8
; CHECK-NEXT: bl __aeabi_dcmpgt
+; CHECK-NEXT: ldr.w r10, [sp, #32] @ 4-byte Reload
; CHECK-NEXT: mov r7, r0
; CHECK-NEXT: mov r0, r9
; CHECK-NEXT: mov r1, r8
-; CHECK-NEXT: mov r2, r10
; CHECK-NEXT: mov r3, r11
-; CHECK-NEXT: mov r4, r10
-; CHECK-NEXT: str.w r10, [sp, #28] @ 4-byte Spill
-; CHECK-NEXT: mov r5, r11
-; CHECK-NEXT: str.w r11, [sp, #4] @ 4-byte Spill
+; CHECK-NEXT: mov r2, r10
; CHECK-NEXT: bl __aeabi_dcmpge
; CHECK-NEXT: mov r11, r0
; CHECK-NEXT: mov r0, r9
; CHECK-NEXT: mov r1, r8
; CHECK-NEXT: bl __fixdfti
-; CHECK-NEXT: mov r10, r3
-; CHECK-NEXT: str r0, [sp, #8] @ 4-byte Spill
-; CHECK-NEXT: str r1, [sp, #36] @ 4-byte Spill
; CHECK-NEXT: cmp.w r11, #0
-; CHECK-NEXT: str r2, [sp, #16] @ 4-byte Spill
-; CHECK-NEXT: it eq
-; CHECK-NEXT: mvneq r10, #7
+; CHECK-NEXT: strd r2, r3, [sp, #4] @ 8-byte Folded Spill
+; CHECK-NEXT: csel r11, r1, r11, ne
+; CHECK-NEXT: mov r4, r0
; CHECK-NEXT: mov r0, r9
; CHECK-NEXT: mov r1, r8
; CHECK-NEXT: mov r2, r9
; CHECK-NEXT: mov r3, r8
; CHECK-NEXT: cmp r7, #0
; CHECK-NEXT: it ne
-; CHECK-NEXT: movne.w r10, #7
+; CHECK-NEXT: movne.w r11, #-1
+; CHECK-NEXT: bl __aeabi_dcmpun
+; CHECK-NEXT: cmp r0, #0
+; CHECK-NEXT: mov r0, r9
+; CHECK-NEXT: mov r1, r8
+; CHECK-NEXT: mov r2, r6
+; CHECK-NEXT: mov r3, r5
+; CHECK-NEXT: it ne
+; CHECK-NEXT: movne.w r11, #0
+; CHECK-NEXT: bl __aeabi_dcmpgt
+; CHECK-NEXT: ldr r5, [sp, #36] @ 4-byte Reload
+; CHECK-NEXT: mov r7, r0
+; CHECK-NEXT: mov r0, r9
+; CHECK-NEXT: mov r1, r8
+; CHECK-NEXT: mov r2, r10
+; CHECK-NEXT: mov r6, r10
+; CHECK-NEXT: mov r3, r5
+; CHECK-NEXT: bl __aeabi_dcmpge
+; CHECK-NEXT: cmp r0, #0
+; CHECK-NEXT: mov r1, r8
+; CHECK-NEXT: csel r10, r4, r0, ne
+; CHECK-NEXT: mov r0, r9
+; CHECK-NEXT: mov r2, r9
+; CHECK-NEXT: mov r3, r8
+; CHECK-NEXT: cmp r7, #0
+; CHECK-NEXT: it ne
+; CHECK-NEXT: movne.w r10, #-1
; CHECK-NEXT: bl __aeabi_dcmpun
; CHECK-NEXT: cmp r0, #0
; CHECK-NEXT: it ne
; CHECK-NEXT: movne.w r10, #0
; CHECK-NEXT: ldr r7, [sp, #44] @ 4-byte Reload
; CHECK-NEXT: mov r0, r9
+; CHECK-NEXT: ldr r3, [sp, #40] @ 4-byte Reload
; CHECK-NEXT: mov r1, r8
-; CHECK-NEXT: mov r3, r6
+; CHECK-NEXT: str.w r10, [sp, #12] @ 4-byte Spill
+; CHECK-NEXT: lsrl r10, r11, #28
; CHECK-NEXT: mov r2, r7
; CHECK-NEXT: bl __aeabi_dcmpgt
-; CHECK-NEXT: mov r11, r0
+; CHECK-NEXT: str r0, [sp] @ 4-byte Spill
; CHECK-NEXT: mov r0, r9
; CHECK-NEXT: mov r1, r8
-; CHECK-NEXT: mov r2, r4
+; CHECK-NEXT: mov r2, r6
; CHECK-NEXT: mov r3, r5
; CHECK-NEXT: bl __aeabi_dcmpge
-; CHECK-NEXT: ldr r1, [sp, #36] @ 4-byte Reload
+; CHECK-NEXT: ldr r1, [sp, #4] @ 4-byte Reload
; CHECK-NEXT: cmp r0, #0
; CHECK-NEXT: mov r2, r9
; CHECK-NEXT: mov r3, r8
; CHECK-NEXT: csel r4, r1, r0, ne
-; CHECK-NEXT: mov r0, r9
+; CHECK-NEXT: ldr r0, [sp] @ 4-byte Reload
; CHECK-NEXT: mov r1, r8
-; CHECK-NEXT: cmp.w r11, #0
+; CHECK-NEXT: cmp r0, #0
+; CHECK-NEXT: mov r0, r9
; CHECK-NEXT: it ne
; CHECK-NEXT: movne.w r4, #-1
; CHECK-NEXT: bl __aeabi_dcmpun
; CHECK-NEXT: cmp r0, #0
; CHECK-NEXT: it ne
; CHECK-NEXT: movne r4, #0
-; CHECK-NEXT: vmov q0[3], q0[1], r4, r10
+; CHECK-NEXT: ldr r6, [sp, #16] @ 4-byte Reload
+; CHECK-NEXT: orr.w r0, r11, r4, lsl #4
; CHECK-NEXT: mov r1, r8
-; CHECK-NEXT: vmov r0, s1
; CHECK-NEXT: mov r2, r7
-; CHECK-NEXT: mov r3, r6
-; CHECK-NEXT: mov r5, r6
-; CHECK-NEXT: str r0, [sp, #36] @ 4-byte Spill
+; CHECK-NEXT: strd r10, r0, [r6, #16]
; CHECK-NEXT: mov r0, r9
+; CHECK-NEXT: ldr.w r11, [sp, #40] @ 4-byte Reload
+; CHECK-NEXT: mov r3, r11
; CHECK-NEXT: bl __aeabi_dcmpgt
-; CHECK-NEXT: ldr r7, [sp, #28] @ 4-byte Reload
-; CHECK-NEXT: mov r4, r0
-; CHECK-NEXT: ldr r6, [sp, #4] @ 4-byte Reload
+; CHECK-NEXT: ldr r7, [sp, #32] @ 4-byte Reload
+; CHECK-NEXT: mov r10, r0
; CHECK-NEXT: mov r0, r9
; CHECK-NEXT: mov r1, r8
+; CHECK-NEXT: mov r3, r5
; CHECK-NEXT: mov r2, r7
-; CHECK-NEXT: mov r3, r6
; CHECK-NEXT: bl __aeabi_dcmpge
-; CHECK-NEXT: ldr r1, [sp, #8] @ 4-byte Reload
-; CHECK-NEXT: cmp r0, #0
-; CHECK-NEXT: mov r2, r9
-; CHECK-NEXT: mov r3, r8
-; CHECK-NEXT: csel r11, r1, r0, ne
-; CHECK-NEXT: mov r0, r9
-; CHECK-NEXT: mov r1, r8
-; CHECK-NEXT: cmp r4, #0
-; CHECK-NEXT: it ne
-; CHECK-NEXT: movne.w r11, #-1
-; CHECK-NEXT: bl __aeabi_dcmpun
; CHECK-NEXT: cmp r0, #0
+; CHECK-NEXT: ldr r0, [sp, #8] @ 4-byte Reload
+; CHECK-NEXT: it eq
+; CHECK-NEXT: mvneq r0, #7
+; CHECK-NEXT: cmp.w r10, #0
; CHECK-NEXT: it ne
-; CHECK-NEXT: movne.w r11, #0
-; CHECK-NEXT: ldr r1, [sp, #36] @ 4-byte Reload
-; CHECK-NEXT: mov r4, r11
-; CHECK-NEXT: ldr r2, [sp, #44] @ 4-byte Reload
-; CHECK-NEXT: mov r0, r9
-; CHECK-NEXT: lsrl r4, r1, #28
-; CHECK-NEXT: mov r3, r5
-; CHECK-NEXT: str r1, [sp, #36] @ 4-byte Spill
-; CHECK-NEXT: mov r1, r8
-; CHECK-NEXT: bl __aeabi_dcmpgt
+; CHECK-NEXT: movne r0, #7
; CHECK-NEXT: mov r5, r0
-; CHECK-NEXT: mov r2, r7
; CHECK-NEXT: mov r0, r9
; CHECK-NEXT: mov r1, r8
-; CHECK-NEXT: mov r3, r6
-; CHECK-NEXT: mov r7, r6
-; CHECK-NEXT: bl __aeabi_dcmpge
-; CHECK-NEXT: ldr r1, [sp, #16] @ 4-byte Reload
-; CHECK-NEXT: cmp r0, #0
; CHECK-NEXT: mov r2, r9
; CHECK-NEXT: mov r3, r8
-; CHECK-NEXT: csel r6, r1, r0, ne
-; CHECK-NEXT: mov r0, r9
-; CHECK-NEXT: mov r1, r8
-; CHECK-NEXT: cmp r5, #0
-; CHECK-NEXT: it ne
-; CHECK-NEXT: movne.w r6, #-1
; CHECK-NEXT: bl __aeabi_dcmpun
; CHECK-NEXT: cmp r0, #0
; CHECK-NEXT: it ne
-; CHECK-NEXT: movne r6, #0
-; CHECK-NEXT: ldr r0, [sp, #36] @ 4-byte Reload
-; CHECK-NEXT: and r1, r10, #15
-; CHECK-NEXT: ldr r2, [sp, #40] @ 4-byte Reload
-; CHECK-NEXT: orr.w r0, r0, r6, lsl #4
-; CHECK-NEXT: lsrl r6, r1, #28
-; CHECK-NEXT: strd r4, r0, [r2, #16]
-; CHECK-NEXT: mov r8, r2
-; CHECK-NEXT: strb r6, [r2, #24]
-; CHECK-NEXT: ldr r5, [sp, #12] @ 4-byte Reload
+; CHECK-NEXT: movne r5, #0
+; CHECK-NEXT: and r1, r5, #15
+; CHECK-NEXT: mov r8, r6
+; CHECK-NEXT: lsrl r4, r1, #28
+; CHECK-NEXT: mov r3, r11
+; CHECK-NEXT: strb r4, [r6, #24]
; CHECK-NEXT: ldr r6, [sp, #20] @ 4-byte Reload
+; CHECK-NEXT: ldr r5, [sp, #24] @ 4-byte Reload
; CHECK-NEXT: ldr r2, [sp, #44] @ 4-byte Reload
-; CHECK-NEXT: ldr r3, [sp, #32] @ 4-byte Reload
-; CHECK-NEXT: mov r0, r5
-; CHECK-NEXT: mov r1, r6
+; CHECK-NEXT: mov r0, r6
+; CHECK-NEXT: mov r1, r5
; CHECK-NEXT: bl __aeabi_dcmpgt
-; CHECK-NEXT: ldr r2, [sp, #28] @ 4-byte Reload
+; CHECK-NEXT: ldr r3, [sp, #36] @ 4-byte Reload
; CHECK-NEXT: mov r4, r0
-; CHECK-NEXT: mov r0, r5
-; CHECK-NEXT: mov r1, r6
-; CHECK-NEXT: mov r3, r7
+; CHECK-NEXT: mov r0, r6
+; CHECK-NEXT: mov r1, r5
+; CHECK-NEXT: mov r2, r7
; CHECK-NEXT: bl __aeabi_dcmpge
; CHECK-NEXT: cmp r0, #0
-; CHECK-NEXT: ldr r0, [sp, #24] @ 4-byte Reload
+; CHECK-NEXT: ldr r0, [sp, #28] @ 4-byte Reload
; CHECK-NEXT: it eq
; CHECK-NEXT: mvneq r0, #7
; CHECK-NEXT: cmp r4, #0
; CHECK-NEXT: it ne
; CHECK-NEXT: movne r0, #7
; CHECK-NEXT: mov r4, r0
-; CHECK-NEXT: mov r0, r5
-; CHECK-NEXT: mov r1, r6
-; CHECK-NEXT: mov r2, r5
-; CHECK-NEXT: mov r3, r6
+; CHECK-NEXT: mov r0, r6
+; CHECK-NEXT: mov r1, r5
+; CHECK-NEXT: mov r2, r6
+; CHECK-NEXT: mov r3, r5
; CHECK-NEXT: bl __aeabi_dcmpun
; CHECK-NEXT: cmp r0, #0
; CHECK-NEXT: it ne
; CHECK-NEXT: movne r4, #0
+; CHECK-NEXT: ldr r1, [sp, #12] @ 4-byte Reload
; CHECK-NEXT: and r0, r4, #15
-; CHECK-NEXT: orr.w r0, r0, r11, lsl #4
+; CHECK-NEXT: orr.w r0, r0, r1, lsl #4
; CHECK-NEXT: str.w r0, [r8, #12]
; CHECK-NEXT: add sp, #48
; CHECK-NEXT: vpop {d8, d9}
@@ -5433,7 +5420,7 @@ define arm_aapcs_vfpcc <8 x i100> @test_signed_v8f16_v8i100(<8 x half> %f) {
; CHECK-NEXT: .pad #32
; CHECK-NEXT: sub sp, #32
; CHECK-NEXT: vmov q4, q0
-; CHECK-NEXT: mov r9, r0
+; CHECK-NEXT: mov r11, r0
; CHECK-NEXT: vcvtb.f32.f16 s21, s19
; CHECK-NEXT: vcvtt.f32.f16 s24, s19
; CHECK-NEXT: vmov r0, s21
@@ -5442,13 +5429,13 @@ define arm_aapcs_vfpcc <8 x i100> @test_signed_v8f16_v8i100(<8 x half> %f) {
; CHECK-NEXT: vcvtb.f32.f16 s30, s18
; CHECK-NEXT: vldr s20, .LCPI50_2
; CHECK-NEXT: vmov r8, s24
-; CHECK-NEXT: vmov r4, s26
+; CHECK-NEXT: vmov r9, s26
; CHECK-NEXT: vcvtt.f32.f16 s22, s18
; CHECK-NEXT: vmov r6, s28
-; CHECK-NEXT: vmov r5, s30
+; CHECK-NEXT: vmov r7, s30
; CHECK-NEXT: bl __fixsfti
; CHECK-NEXT: vldr s18, .LCPI50_3
-; CHECK-NEXT: mov r7, r3
+; CHECK-NEXT: mov r5, r3
; CHECK-NEXT: vcmp.f32 s21, s18
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: vcmp.f32 s21, s20
@@ -5464,7 +5451,7 @@ define arm_aapcs_vfpcc <8 x i100> @test_signed_v8f16_v8i100(<8 x half> %f) {
; CHECK-NEXT: movvs r2, #0
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: vcmp.f32 s21, s20
-; CHECK-NEXT: str.w r2, [r9, #83]
+; CHECK-NEXT: str.w r2, [r11, #83]
; CHECK-NEXT: it lt
; CHECK-NEXT: movlt r1, #0
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
@@ -5476,7 +5463,7 @@ define arm_aapcs_vfpcc <8 x i100> @test_signed_v8f16_v8i100(<8 x half> %f) {
; CHECK-NEXT: it vs
; CHECK-NEXT: movvs r1, #0
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: str.w r1, [r9, #79]
+; CHECK-NEXT: str.w r1, [r11, #79]
; CHECK-NEXT: it lt
; CHECK-NEXT: movlt r0, #0
; CHECK-NEXT: vcmp.f32 s21, s20
@@ -5487,11 +5474,11 @@ define arm_aapcs_vfpcc <8 x i100> @test_signed_v8f16_v8i100(<8 x half> %f) {
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: it vs
; CHECK-NEXT: movvs r0, #0
-; CHECK-NEXT: str.w r0, [r9, #75]
-; CHECK-NEXT: mov r0, r5
+; CHECK-NEXT: str.w r0, [r11, #75]
+; CHECK-NEXT: mov r0, r7
; CHECK-NEXT: bl __fixsfti
; CHECK-NEXT: vcmp.f32 s30, s18
-; CHECK-NEXT: mov r5, r3
+; CHECK-NEXT: mov r7, r3
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: vcmp.f32 s30, s20
; CHECK-NEXT: it lt
@@ -5506,7 +5493,7 @@ define arm_aapcs_vfpcc <8 x i100> @test_signed_v8f16_v8i100(<8 x half> %f) {
; CHECK-NEXT: movvs r2, #0
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: vcmp.f32 s30, s20
-; CHECK-NEXT: str.w r2, [r9, #58]
+; CHECK-NEXT: str.w r2, [r11, #58]
; CHECK-NEXT: it lt
; CHECK-NEXT: movlt r1, #0
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
@@ -5518,7 +5505,7 @@ define arm_aapcs_vfpcc <8 x i100> @test_signed_v8f16_v8i100(<8 x half> %f) {
; CHECK-NEXT: it vs
; CHECK-NEXT: movvs r1, #0
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: str.w r1, [r9, #54]
+; CHECK-NEXT: str.w r1, [r11, #54]
; CHECK-NEXT: it lt
; CHECK-NEXT: movlt r0, #0
; CHECK-NEXT: vcmp.f32 s30, s20
@@ -5529,7 +5516,7 @@ define arm_aapcs_vfpcc <8 x i100> @test_signed_v8f16_v8i100(<8 x half> %f) {
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: it vs
; CHECK-NEXT: movvs r0, #0
-; CHECK-NEXT: str.w r0, [r9, #50]
+; CHECK-NEXT: str.w r0, [r11, #50]
; CHECK-NEXT: mov r0, r6
; CHECK-NEXT: bl __fixsfti
; CHECK-NEXT: vcmp.f32 s28, s18
@@ -5548,7 +5535,7 @@ define arm_aapcs_vfpcc <8 x i100> @test_signed_v8f16_v8i100(<8 x half> %f) {
; CHECK-NEXT: movvs r2, #0
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: vcmp.f32 s28, s20
-; CHECK-NEXT: str.w r2, [r9, #33]
+; CHECK-NEXT: str.w r2, [r11, #33]
; CHECK-NEXT: it lt
; CHECK-NEXT: movlt r1, #0
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
@@ -5560,7 +5547,7 @@ define arm_aapcs_vfpcc <8 x i100> @test_signed_v8f16_v8i100(<8 x half> %f) {
; CHECK-NEXT: it vs
; CHECK-NEXT: movvs r1, #0
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: str.w r1, [r9, #29]
+; CHECK-NEXT: str.w r1, [r11, #29]
; CHECK-NEXT: it lt
; CHECK-NEXT: movlt r0, #0
; CHECK-NEXT: vcmp.f32 s28, s20
@@ -5571,8 +5558,8 @@ define arm_aapcs_vfpcc <8 x i100> @test_signed_v8f16_v8i100(<8 x half> %f) {
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: it vs
; CHECK-NEXT: movvs r0, #0
-; CHECK-NEXT: str.w r0, [r9, #25]
-; CHECK-NEXT: mov r0, r4
+; CHECK-NEXT: str.w r0, [r11, #25]
+; CHECK-NEXT: mov r0, r9
; CHECK-NEXT: bl __fixsfti
; CHECK-NEXT: vcmp.f32 s26, s18
; CHECK-NEXT: str r3, [sp, #12] @ 4-byte Spill
@@ -5590,7 +5577,7 @@ define arm_aapcs_vfpcc <8 x i100> @test_signed_v8f16_v8i100(<8 x half> %f) {
; CHECK-NEXT: movvs r2, #0
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: vcmp.f32 s26, s20
-; CHECK-NEXT: str.w r2, [r9, #8]
+; CHECK-NEXT: str.w r2, [r11, #8]
; CHECK-NEXT: it lt
; CHECK-NEXT: movlt r1, #0
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
@@ -5602,7 +5589,7 @@ define arm_aapcs_vfpcc <8 x i100> @test_signed_v8f16_v8i100(<8 x half> %f) {
; CHECK-NEXT: it vs
; CHECK-NEXT: movvs r1, #0
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: str.w r1, [r9, #4]
+; CHECK-NEXT: str.w r1, [r11, #4]
; CHECK-NEXT: it lt
; CHECK-NEXT: movlt r0, #0
; CHECK-NEXT: vcmp.f32 s26, s20
@@ -5613,7 +5600,7 @@ define arm_aapcs_vfpcc <8 x i100> @test_signed_v8f16_v8i100(<8 x half> %f) {
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: it vs
; CHECK-NEXT: movvs r0, #0
-; CHECK-NEXT: str.w r0, [r9]
+; CHECK-NEXT: str.w r0, [r11]
; CHECK-NEXT: mov r0, r8
; CHECK-NEXT: bl __fixsfti
; CHECK-NEXT: vcmp.f32 s24, s18
@@ -5633,69 +5620,68 @@ define arm_aapcs_vfpcc <8 x i100> @test_signed_v8f16_v8i100(<8 x half> %f) {
; CHECK-NEXT: movvs r6, #0
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: it lt
-; CHECK-NEXT: mvnlt r7, #7
+; CHECK-NEXT: mvnlt r5, #7
; CHECK-NEXT: vcmp.f32 s21, s20
-; CHECK-NEXT: mov r11, r1
+; CHECK-NEXT: mov r9, r1
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: it gt
-; CHECK-NEXT: movgt r7, #7
+; CHECK-NEXT: movgt r5, #7
; CHECK-NEXT: vcmp.f32 s21, s21
-; CHECK-NEXT: mov r10, r2
+; CHECK-NEXT: mov r8, r2
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: it vs
-; CHECK-NEXT: movvs r7, #0
-; CHECK-NEXT: and r0, r7, #15
+; CHECK-NEXT: movvs r5, #0
+; CHECK-NEXT: and r0, r5, #15
; CHECK-NEXT: orr.w r1, r0, r6, lsl #4
; CHECK-NEXT: vmov r0, s22
-; CHECK-NEXT: str.w r1, [r9, #87]
+; CHECK-NEXT: str.w r1, [r11, #87]
; CHECK-NEXT: bl __fixsfti
; CHECK-NEXT: vcmp.f32 s22, s18
-; CHECK-NEXT: mov r8, r0
+; CHECK-NEXT: mov r10, r0
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: vcmp.f32 s22, s20
-; CHECK-NEXT: str r2, [sp, #20] @ 4-byte Spill
+; CHECK-NEXT: str r2, [sp, #4] @ 4-byte Spill
+; CHECK-NEXT: mov r5, r1
+; CHECK-NEXT: str r3, [sp, #20] @ 4-byte Spill
; CHECK-NEXT: it lt
-; CHECK-NEXT: movlt.w r8, #0
+; CHECK-NEXT: movlt.w r10, #0
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: vcmp.f32 s22, s22
; CHECK-NEXT: it gt
-; CHECK-NEXT: movgt.w r8, #-1
+; CHECK-NEXT: movgt.w r10, #-1
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: vcmp.f32 s30, s18
; CHECK-NEXT: it vs
-; CHECK-NEXT: movvs.w r8, #0
+; CHECK-NEXT: movvs.w r10, #0
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: it lt
-; CHECK-NEXT: mvnlt r5, #7
+; CHECK-NEXT: mvnlt r7, #7
; CHECK-NEXT: vcmp.f32 s30, s20
-; CHECK-NEXT: mov r4, r1
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: vcmp.f32 s30, s30
; CHECK-NEXT: it gt
-; CHECK-NEXT: movgt r5, #7
+; CHECK-NEXT: movgt r7, #7
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: it vs
-; CHECK-NEXT: movvs r5, #0
-; CHECK-NEXT: and r0, r5, #15
-; CHECK-NEXT: orr.w r0, r0, r8, lsl #4
+; CHECK-NEXT: movvs r7, #0
+; CHECK-NEXT: and r0, r7, #15
+; CHECK-NEXT: orr.w r0, r0, r10, lsl #4
; CHECK-NEXT: vcvtt.f32.f16 s30, s17
-; CHECK-NEXT: str.w r0, [r9, #62]
+; CHECK-NEXT: str.w r0, [r11, #62]
; CHECK-NEXT: vmov r0, s30
-; CHECK-NEXT: mov r7, r3
; CHECK-NEXT: bl __fixsfti
; CHECK-NEXT: vcmp.f32 s30, s18
-; CHECK-NEXT: str r1, [sp, #16] @ 4-byte Spill
+; CHECK-NEXT: str r2, [sp, #16] @ 4-byte Spill
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: str r2, [sp, #28] @ 4-byte Spill
-; CHECK-NEXT: str r3, [sp, #4] @ 4-byte Spill
+; CHECK-NEXT: str r3, [sp, #28] @ 4-byte Spill
; CHECK-NEXT: it lt
; CHECK-NEXT: movlt r0, #0
; CHECK-NEXT: vcmp.f32 s30, s20
-; CHECK-NEXT: vcvtt.f32.f16 s16, s16
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: it gt
; CHECK-NEXT: movgt.w r0, #-1
; CHECK-NEXT: vcmp.f32 s30, s30
+; CHECK-NEXT: mov r4, r1
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: it vs
; CHECK-NEXT: movvs r0, #0
@@ -5715,8 +5701,9 @@ define arm_aapcs_vfpcc <8 x i100> @test_signed_v8f16_v8i100(<8 x half> %f) {
; CHECK-NEXT: it vs
; CHECK-NEXT: movvs r0, #0
; CHECK-NEXT: and r0, r0, #15
+; CHECK-NEXT: vcvtt.f32.f16 s16, s16
; CHECK-NEXT: orr.w r0, r0, r1, lsl #4
-; CHECK-NEXT: str.w r0, [r9, #37]
+; CHECK-NEXT: str.w r0, [r11, #37]
; CHECK-NEXT: vmov r0, s16
; CHECK-NEXT: bl __fixsfti
; CHECK-NEXT: vcmp.f32 s16, s18
@@ -5732,26 +5719,26 @@ define arm_aapcs_vfpcc <8 x i100> @test_signed_v8f16_v8i100(<8 x half> %f) {
; CHECK-NEXT: it vs
; CHECK-NEXT: movvs r0, #0
; CHECK-NEXT: vcmp.f32 s26, s18
-; CHECK-NEXT: ldr r5, [sp, #12] @ 4-byte Reload
+; CHECK-NEXT: ldr r7, [sp, #12] @ 4-byte Reload
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: it lt
-; CHECK-NEXT: mvnlt r5, #7
+; CHECK-NEXT: mvnlt r7, #7
; CHECK-NEXT: vcmp.f32 s26, s20
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: it gt
-; CHECK-NEXT: movgt r5, #7
+; CHECK-NEXT: movgt r7, #7
; CHECK-NEXT: vcmp.f32 s26, s26
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: it vs
-; CHECK-NEXT: movvs r5, #0
+; CHECK-NEXT: movvs r7, #0
; CHECK-NEXT: vcmp.f32 s24, s18
-; CHECK-NEXT: and r5, r5, #15
+; CHECK-NEXT: and r7, r7, #15
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: vcmp.f32 s24, s20
-; CHECK-NEXT: orr.w r5, r5, r0, lsl #4
-; CHECK-NEXT: str.w r5, [r9, #12]
+; CHECK-NEXT: orr.w r7, r7, r0, lsl #4
+; CHECK-NEXT: str.w r7, [r11, #12]
; CHECK-NEXT: it lt
-; CHECK-NEXT: movlt.w r11, #0
+; CHECK-NEXT: movlt.w r9, #0
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: b.w .LBB50_3
; CHECK-NEXT: .p2align 2
@@ -5766,181 +5753,176 @@ define arm_aapcs_vfpcc <8 x i100> @test_signed_v8f16_v8i100(<8 x half> %f) {
; CHECK-NEXT: .LBB50_3:
; CHECK-NEXT: vcmp.f32 s24, s24
; CHECK-NEXT: it gt
-; CHECK-NEXT: movgt.w r11, #-1
+; CHECK-NEXT: movgt.w r9, #-1
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: vcmp.f32 s24, s18
; CHECK-NEXT: it vs
-; CHECK-NEXT: movvs.w r11, #0
+; CHECK-NEXT: movvs.w r9, #0
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: it lt
-; CHECK-NEXT: movlt.w r10, #0
+; CHECK-NEXT: movlt.w r8, #0
; CHECK-NEXT: vcmp.f32 s24, s20
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: lsrl r6, r11, #28
+; CHECK-NEXT: lsrl r6, r9, #28
; CHECK-NEXT: it gt
-; CHECK-NEXT: movgt.w r10, #-1
+; CHECK-NEXT: movgt.w r8, #-1
; CHECK-NEXT: vcmp.f32 s24, s24
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: it vs
-; CHECK-NEXT: movvs.w r10, #0
-; CHECK-NEXT: orr.w r5, r11, r10, lsl #4
-; CHECK-NEXT: str.w r5, [r9, #95]
-; CHECK-NEXT: str.w r6, [r9, #91]
+; CHECK-NEXT: movvs.w r8, #0
+; CHECK-NEXT: orr.w r7, r9, r8, lsl #4
+; CHECK-NEXT: str.w r7, [r11, #95]
+; CHECK-NEXT: str.w r6, [r11, #91]
; CHECK-NEXT: vcmp.f32 s24, s18
-; CHECK-NEXT: ldr r6, [sp, #8] @ 4-byte Reload
+; CHECK-NEXT: ldr r7, [sp, #8] @ 4-byte Reload
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: it lt
-; CHECK-NEXT: mvnlt r6, #7
+; CHECK-NEXT: mvnlt r7, #7
; CHECK-NEXT: vcmp.f32 s24, s20
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: it gt
-; CHECK-NEXT: movgt r6, #7
+; CHECK-NEXT: movgt r7, #7
; CHECK-NEXT: vcmp.f32 s24, s24
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: it vs
-; CHECK-NEXT: movvs r6, #0
-; CHECK-NEXT: and r5, r6, #15
+; CHECK-NEXT: movvs r7, #0
+; CHECK-NEXT: and r7, r7, #15
; CHECK-NEXT: vcmp.f32 s22, s18
-; CHECK-NEXT: lsrl r10, r5, #28
+; CHECK-NEXT: lsrl r8, r7, #28
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: vcmp.f32 s22, s20
-; CHECK-NEXT: strb.w r10, [r9, #99]
+; CHECK-NEXT: strb.w r8, [r11, #99]
; CHECK-NEXT: it lt
-; CHECK-NEXT: mvnlt r7, #7
+; CHECK-NEXT: movlt r5, #0
+; CHECK-NEXT: vcmp.f32 s22, s20
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: vcmp.f32 s22, s22
; CHECK-NEXT: it gt
-; CHECK-NEXT: movgt r7, #7
+; CHECK-NEXT: movgt.w r5, #-1
+; CHECK-NEXT: vcmp.f32 s22, s22
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: vcmp.f32 s22, s18
; CHECK-NEXT: it vs
-; CHECK-NEXT: movvs r7, #0
+; CHECK-NEXT: movvs r5, #0
+; CHECK-NEXT: vcmp.f32 s22, s18
+; CHECK-NEXT: ldr r6, [sp, #4] @ 4-byte Reload
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: it lt
-; CHECK-NEXT: movlt r4, #0
+; CHECK-NEXT: movlt r6, #0
; CHECK-NEXT: vcmp.f32 s22, s20
+; CHECK-NEXT: lsrl r10, r5, #28
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: it gt
-; CHECK-NEXT: movgt.w r4, #-1
+; CHECK-NEXT: movgt.w r6, #-1
; CHECK-NEXT: vcmp.f32 s22, s22
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: it vs
-; CHECK-NEXT: movvs r4, #0
-; CHECK-NEXT: vmov q0[3], q0[1], r4, r7
+; CHECK-NEXT: movvs r6, #0
+; CHECK-NEXT: orr.w r7, r5, r6, lsl #4
+; CHECK-NEXT: str.w r7, [r11, #70]
+; CHECK-NEXT: str.w r10, [r11, #66]
; CHECK-NEXT: vcmp.f32 s22, s18
-; CHECK-NEXT: ldr r4, [sp, #20] @ 4-byte Reload
-; CHECK-NEXT: vmov r5, s1
+; CHECK-NEXT: ldr r7, [sp, #20] @ 4-byte Reload
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: it lt
-; CHECK-NEXT: movlt r4, #0
+; CHECK-NEXT: mvnlt r7, #7
; CHECK-NEXT: vcmp.f32 s22, s20
-; CHECK-NEXT: lsrl r8, r5, #28
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: it gt
-; CHECK-NEXT: movgt.w r4, #-1
+; CHECK-NEXT: movgt r7, #7
; CHECK-NEXT: vcmp.f32 s22, s22
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: it vs
-; CHECK-NEXT: movvs r4, #0
-; CHECK-NEXT: orr.w r6, r5, r4, lsl #4
+; CHECK-NEXT: movvs r7, #0
; CHECK-NEXT: and r5, r7, #15
-; CHECK-NEXT: lsrl r4, r5, #28
-; CHECK-NEXT: str.w r6, [r9, #70]
-; CHECK-NEXT: str.w r8, [r9, #66]
; CHECK-NEXT: vcmp.f32 s30, s18
-; CHECK-NEXT: strb.w r4, [r9, #74]
+; CHECK-NEXT: lsrl r6, r5, #28
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: ldr r4, [sp, #4] @ 4-byte Reload
-; CHECK-NEXT: vcmp.f32 s30, s20
+; CHECK-NEXT: mov r5, r4
+; CHECK-NEXT: strb.w r6, [r11, #74]
; CHECK-NEXT: it lt
-; CHECK-NEXT: mvnlt r4, #7
+; CHECK-NEXT: movlt r5, #0
+; CHECK-NEXT: vcmp.f32 s30, s20
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: it gt
-; CHECK-NEXT: movgt r4, #7
+; CHECK-NEXT: movgt.w r5, #-1
; CHECK-NEXT: vcmp.f32 s30, s30
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: it vs
-; CHECK-NEXT: movvs r4, #0
+; CHECK-NEXT: movvs r5, #0
+; CHECK-NEXT: ldr r4, [sp] @ 4-byte Reload
; CHECK-NEXT: vcmp.f32 s30, s18
-; CHECK-NEXT: ldr r7, [sp, #16] @ 4-byte Reload
+; CHECK-NEXT: ldr r6, [sp, #16] @ 4-byte Reload
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: it lt
-; CHECK-NEXT: movlt r7, #0
+; CHECK-NEXT: movlt r6, #0
; CHECK-NEXT: vcmp.f32 s30, s20
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: lsrl r4, r5, #28
; CHECK-NEXT: it gt
-; CHECK-NEXT: movgt.w r7, #-1
+; CHECK-NEXT: movgt.w r6, #-1
; CHECK-NEXT: vcmp.f32 s30, s30
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: it vs
-; CHECK-NEXT: movvs r7, #0
-; CHECK-NEXT: vmov q0[3], q0[1], r7, r4
+; CHECK-NEXT: movvs r6, #0
+; CHECK-NEXT: orr.w r7, r5, r6, lsl #4
+; CHECK-NEXT: str.w r7, [r11, #45]
+; CHECK-NEXT: str.w r4, [r11, #41]
; CHECK-NEXT: vcmp.f32 s30, s18
-; CHECK-NEXT: ldr.w r12, [sp] @ 4-byte Reload
-; CHECK-NEXT: vmov r5, s1
-; CHECK-NEXT: ldr r6, [sp, #28] @ 4-byte Reload
+; CHECK-NEXT: ldr r7, [sp, #28] @ 4-byte Reload
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: it lt
-; CHECK-NEXT: movlt r6, #0
+; CHECK-NEXT: mvnlt r7, #7
; CHECK-NEXT: vcmp.f32 s30, s20
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: lsrl r12, r5, #28
; CHECK-NEXT: it gt
-; CHECK-NEXT: movgt.w r6, #-1
+; CHECK-NEXT: movgt r7, #7
; CHECK-NEXT: vcmp.f32 s30, s30
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: it vs
-; CHECK-NEXT: movvs r6, #0
-; CHECK-NEXT: orr.w r7, r5, r6, lsl #4
-; CHECK-NEXT: and r5, r4, #15
+; CHECK-NEXT: movvs r7, #0
+; CHECK-NEXT: and r5, r7, #15
; CHECK-NEXT: vcmp.f32 s16, s18
; CHECK-NEXT: lsrl r6, r5, #28
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: vcmp.f32 s16, s20
-; CHECK-NEXT: str.w r7, [r9, #45]
-; CHECK-NEXT: str.w r12, [r9, #41]
-; CHECK-NEXT: strb.w r6, [r9, #49]
+; CHECK-NEXT: strb.w r6, [r11, #49]
; CHECK-NEXT: it lt
-; CHECK-NEXT: mvnlt r3, #7
+; CHECK-NEXT: movlt r1, #0
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: vcmp.f32 s16, s16
; CHECK-NEXT: it gt
-; CHECK-NEXT: movgt r3, #7
+; CHECK-NEXT: movgt.w r1, #-1
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: vcmp.f32 s16, s18
; CHECK-NEXT: it vs
-; CHECK-NEXT: movvs r3, #0
+; CHECK-NEXT: movvs r1, #0
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: it lt
-; CHECK-NEXT: movlt r1, #0
+; CHECK-NEXT: movlt r2, #0
; CHECK-NEXT: vcmp.f32 s16, s20
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: lsrl r0, r1, #28
; CHECK-NEXT: it gt
-; CHECK-NEXT: movgt.w r1, #-1
+; CHECK-NEXT: movgt.w r2, #-1
; CHECK-NEXT: vcmp.f32 s16, s16
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: it vs
-; CHECK-NEXT: movvs r1, #0
-; CHECK-NEXT: vmov q0[3], q0[1], r1, r3
+; CHECK-NEXT: movvs r2, #0
+; CHECK-NEXT: orr.w r1, r1, r2, lsl #4
; CHECK-NEXT: vcmp.f32 s16, s18
-; CHECK-NEXT: vmov r1, s1
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: it lt
-; CHECK-NEXT: movlt r2, #0
; CHECK-NEXT: vcmp.f32 s16, s20
+; CHECK-NEXT: strd r0, r1, [r11, #16]
+; CHECK-NEXT: it lt
+; CHECK-NEXT: mvnlt r3, #7
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: lsrl r0, r1, #28
; CHECK-NEXT: it gt
-; CHECK-NEXT: movgt.w r2, #-1
+; CHECK-NEXT: movgt r3, #7
; CHECK-NEXT: vcmp.f32 s16, s16
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: it vs
-; CHECK-NEXT: movvs r2, #0
-; CHECK-NEXT: orr.w r1, r1, r2, lsl #4
-; CHECK-NEXT: strd r0, r1, [r9, #16]
+; CHECK-NEXT: movvs r3, #0
; CHECK-NEXT: and r1, r3, #15
; CHECK-NEXT: lsrl r2, r1, #28
-; CHECK-NEXT: strb.w r2, [r9, #24]
+; CHECK-NEXT: strb.w r2, [r11, #24]
; CHECK-NEXT: add sp, #32
; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15}
; CHECK-NEXT: add sp, #4
diff --git a/llvm/test/CodeGen/Thumb2/mve-fptoui-sat-vector.ll b/llvm/test/CodeGen/Thumb2/mve-fptoui-sat-vector.ll
index 2b6d0da..5ab184a 100644
--- a/llvm/test/CodeGen/Thumb2/mve-fptoui-sat-vector.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-fptoui-sat-vector.ll
@@ -1879,26 +1879,16 @@ define arm_aapcs_vfpcc <4 x i100> @test_unsigned_v4f32_v4i100(<4 x float> %f) {
; CHECK-NEXT: vcmp.f32 s17, #0
; CHECK-NEXT: lsrl r6, r5, #28
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: vcmp.f32 s17, s20
; CHECK-NEXT: strb.w r6, [r8, #49]
; CHECK-NEXT: it lt
-; CHECK-NEXT: movlt r3, #0
-; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: vcmp.f32 s17, #0
-; CHECK-NEXT: it gt
-; CHECK-NEXT: movgt r3, #15
-; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: it lt
; CHECK-NEXT: movlt r1, #0
; CHECK-NEXT: vcmp.f32 s17, s20
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: it gt
; CHECK-NEXT: movgt.w r1, #-1
-; CHECK-NEXT: vmov q0[3], q0[1], r1, r3
; CHECK-NEXT: vcmp.f32 s17, #0
-; CHECK-NEXT: vmov r1, s1
-; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: lsrl r0, r1, #28
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: it lt
; CHECK-NEXT: movlt r2, #0
; CHECK-NEXT: vcmp.f32 s17, s20
@@ -1906,7 +1896,15 @@ define arm_aapcs_vfpcc <4 x i100> @test_unsigned_v4f32_v4i100(<4 x float> %f) {
; CHECK-NEXT: it gt
; CHECK-NEXT: movgt.w r2, #-1
; CHECK-NEXT: orr.w r1, r1, r2, lsl #4
+; CHECK-NEXT: vcmp.f32 s17, #0
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vcmp.f32 s17, s20
; CHECK-NEXT: strd r0, r1, [r8, #16]
+; CHECK-NEXT: it lt
+; CHECK-NEXT: movlt r3, #0
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: it gt
+; CHECK-NEXT: movgt r3, #15
; CHECK-NEXT: and r1, r3, #15
; CHECK-NEXT: lsrl r2, r1, #28
; CHECK-NEXT: strb.w r2, [r8, #24]
@@ -2925,195 +2923,191 @@ define arm_aapcs_vfpcc <2 x i100> @test_unsigned_v2f64_v2i100(<2 x double> %f) {
; CHECK-NEXT: vmov q4, q0
; CHECK-NEXT: vldr d0, .LCPI40_0
; CHECK-NEXT: vmov r6, r5, d8
-; CHECK-NEXT: mov r8, r0
-; CHECK-NEXT: vmov r2, r9, d0
+; CHECK-NEXT: mov r4, r0
+; CHECK-NEXT: vmov r2, r7, d0
; CHECK-NEXT: mov r0, r6
; CHECK-NEXT: mov r1, r5
-; CHECK-NEXT: mov r3, r9
-; CHECK-NEXT: mov r10, r2
+; CHECK-NEXT: mov r3, r7
+; CHECK-NEXT: mov r9, r2
; CHECK-NEXT: bl __aeabi_dcmpgt
; CHECK-NEXT: vldr d0, .LCPI40_1
-; CHECK-NEXT: mov r7, r0
+; CHECK-NEXT: mov r11, r0
; CHECK-NEXT: mov r0, r6
; CHECK-NEXT: mov r1, r5
-; CHECK-NEXT: vmov r11, r3, d0
-; CHECK-NEXT: str r3, [sp, #44] @ 4-byte Spill
-; CHECK-NEXT: mov r2, r11
+; CHECK-NEXT: vmov r2, r3, d0
+; CHECK-NEXT: str r2, [sp, #44] @ 4-byte Spill
+; CHECK-NEXT: mov r10, r3
; CHECK-NEXT: bl __aeabi_dcmpge
-; CHECK-NEXT: mov r4, r0
+; CHECK-NEXT: mov r8, r0
; CHECK-NEXT: mov r0, r6
; CHECK-NEXT: mov r1, r5
-; CHECK-NEXT: str r5, [sp, #36] @ 4-byte Spill
; CHECK-NEXT: bl __fixunsdfti
-; CHECK-NEXT: cmp r4, #0
+; CHECK-NEXT: cmp.w r8, #0
; CHECK-NEXT: strd r1, r0, [sp, #20] @ 8-byte Folded Spill
-; CHECK-NEXT: csel r0, r2, r4, ne
+; CHECK-NEXT: csel r0, r2, r8, ne
; CHECK-NEXT: str r3, [sp, #40] @ 4-byte Spill
-; CHECK-NEXT: cmp r7, #0
+; CHECK-NEXT: cmp.w r11, #0
; CHECK-NEXT: it ne
; CHECK-NEXT: movne.w r0, #-1
-; CHECK-NEXT: str.w r0, [r8, #8]
+; CHECK-NEXT: str r0, [r4, #8]
; CHECK-NEXT: mov r0, r6
; CHECK-NEXT: mov r1, r5
-; CHECK-NEXT: mov r2, r10
-; CHECK-NEXT: mov r3, r9
+; CHECK-NEXT: mov r2, r9
+; CHECK-NEXT: mov r3, r7
+; CHECK-NEXT: str r7, [sp, #12] @ 4-byte Spill
; CHECK-NEXT: bl __aeabi_dcmpgt
-; CHECK-NEXT: ldr r3, [sp, #44] @ 4-byte Reload
-; CHECK-NEXT: mov r4, r0
+; CHECK-NEXT: mov r8, r0
; CHECK-NEXT: mov r0, r6
+; CHECK-NEXT: mov r11, r6
+; CHECK-NEXT: ldr r6, [sp, #44] @ 4-byte Reload
; CHECK-NEXT: mov r1, r5
-; CHECK-NEXT: mov r2, r11
-; CHECK-NEXT: mov r7, r6
+; CHECK-NEXT: mov r3, r10
+; CHECK-NEXT: mov r2, r6
; CHECK-NEXT: bl __aeabi_dcmpge
; CHECK-NEXT: ldr r1, [sp, #20] @ 4-byte Reload
; CHECK-NEXT: cmp r0, #0
-; CHECK-NEXT: mov r2, r10
-; CHECK-NEXT: mov r3, r9
+; CHECK-NEXT: mov r2, r9
+; CHECK-NEXT: mov r3, r7
; CHECK-NEXT: csel r0, r1, r0, ne
-; CHECK-NEXT: cmp r4, #0
+; CHECK-NEXT: cmp.w r8, #0
; CHECK-NEXT: it ne
; CHECK-NEXT: movne.w r0, #-1
-; CHECK-NEXT: str.w r0, [r8, #4]
-; CHECK-NEXT: mov r0, r7
+; CHECK-NEXT: str r0, [r4, #4]
+; CHECK-NEXT: mov r0, r11
; CHECK-NEXT: mov r1, r5
-; CHECK-NEXT: mov r6, r8
-; CHECK-NEXT: strd r8, r7, [sp, #28] @ 8-byte Folded Spill
+; CHECK-NEXT: strd r4, r11, [sp, #28] @ 8-byte Folded Spill
+; CHECK-NEXT: str r5, [sp, #36] @ 4-byte Spill
; CHECK-NEXT: bl __aeabi_dcmpgt
-; CHECK-NEXT: mov r4, r0
-; CHECK-NEXT: mov r0, r7
-; CHECK-NEXT: ldr r7, [sp, #44] @ 4-byte Reload
+; CHECK-NEXT: mov r7, r0
+; CHECK-NEXT: mov r0, r11
; CHECK-NEXT: mov r1, r5
-; CHECK-NEXT: mov r2, r11
-; CHECK-NEXT: mov r5, r11
-; CHECK-NEXT: mov r3, r7
+; CHECK-NEXT: mov r2, r6
+; CHECK-NEXT: mov r3, r10
; CHECK-NEXT: bl __aeabi_dcmpge
; CHECK-NEXT: vmov r8, r11, d9
; CHECK-NEXT: ldr r1, [sp, #24] @ 4-byte Reload
; CHECK-NEXT: cmp r0, #0
-; CHECK-NEXT: mov r2, r10
+; CHECK-NEXT: mov r2, r9
; CHECK-NEXT: csel r0, r1, r0, ne
-; CHECK-NEXT: cmp r4, #0
+; CHECK-NEXT: cmp r7, #0
; CHECK-NEXT: it ne
; CHECK-NEXT: movne.w r0, #-1
-; CHECK-NEXT: str r0, [r6]
-; CHECK-NEXT: mov r3, r9
+; CHECK-NEXT: str r0, [r4]
+; CHECK-NEXT: ldr r4, [sp, #12] @ 4-byte Reload
+; CHECK-NEXT: mov r5, r9
+; CHECK-NEXT: mov r3, r4
; CHECK-NEXT: mov r0, r8
; CHECK-NEXT: mov r1, r11
; CHECK-NEXT: bl __aeabi_dcmpgt
-; CHECK-NEXT: mov r4, r0
+; CHECK-NEXT: mov r7, r0
+; CHECK-NEXT: mov r2, r6
; CHECK-NEXT: mov r0, r8
; CHECK-NEXT: mov r1, r11
-; CHECK-NEXT: mov r2, r5
-; CHECK-NEXT: mov r3, r7
-; CHECK-NEXT: mov r6, r5
-; CHECK-NEXT: str r5, [sp] @ 4-byte Spill
-; CHECK-NEXT: mov r5, r7
+; CHECK-NEXT: mov r3, r10
+; CHECK-NEXT: mov r6, r10
; CHECK-NEXT: bl __aeabi_dcmpge
-; CHECK-NEXT: mov r7, r0
+; CHECK-NEXT: mov r10, r0
; CHECK-NEXT: mov r0, r8
; CHECK-NEXT: mov r1, r11
; CHECK-NEXT: bl __fixunsdfti
-; CHECK-NEXT: cmp r7, #0
-; CHECK-NEXT: strd r0, r2, [sp, #20] @ 8-byte Folded Spill
-; CHECK-NEXT: csel r0, r3, r7, ne
-; CHECK-NEXT: str r1, [sp, #4] @ 4-byte Spill
-; CHECK-NEXT: cmp r4, #0
-; CHECK-NEXT: it ne
-; CHECK-NEXT: movne r0, #15
-; CHECK-NEXT: mov r7, r0
-; CHECK-NEXT: str r0, [sp, #16] @ 4-byte Spill
+; CHECK-NEXT: add.w r12, sp, #16
+; CHECK-NEXT: cmp.w r10, #0
+; CHECK-NEXT: stm.w r12, {r0, r2, r3} @ 12-byte Folded Spill
+; CHECK-NEXT: csel r9, r1, r10, ne
; CHECK-NEXT: mov r0, r8
; CHECK-NEXT: mov r1, r11
-; CHECK-NEXT: mov r2, r10
-; CHECK-NEXT: mov r3, r9
+; CHECK-NEXT: mov r2, r5
+; CHECK-NEXT: mov r3, r4
+; CHECK-NEXT: cmp r7, #0
+; CHECK-NEXT: it ne
+; CHECK-NEXT: movne.w r9, #-1
+; CHECK-NEXT: mov r7, r5
+; CHECK-NEXT: str r5, [sp, #4] @ 4-byte Spill
+; CHECK-NEXT: mov r10, r4
; CHECK-NEXT: bl __aeabi_dcmpgt
-; CHECK-NEXT: mov r4, r0
-; CHECK-NEXT: mov r0, r8
+; CHECK-NEXT: ldr r5, [sp, #44] @ 4-byte Reload
; CHECK-NEXT: mov r1, r11
-; CHECK-NEXT: mov r2, r6
-; CHECK-NEXT: mov r3, r5
+; CHECK-NEXT: str r0, [sp] @ 4-byte Spill
+; CHECK-NEXT: mov r0, r8
+; CHECK-NEXT: mov r3, r6
+; CHECK-NEXT: str r6, [sp, #8] @ 4-byte Spill
+; CHECK-NEXT: mov r2, r5
; CHECK-NEXT: bl __aeabi_dcmpge
-; CHECK-NEXT: ldr r1, [sp, #4] @ 4-byte Reload
+; CHECK-NEXT: ldr r1, [sp, #16] @ 4-byte Reload
; CHECK-NEXT: cmp r0, #0
-; CHECK-NEXT: mov r2, r10
-; CHECK-NEXT: mov r3, r9
-; CHECK-NEXT: csel r0, r1, r0, ne
-; CHECK-NEXT: cmp r4, #0
-; CHECK-NEXT: it ne
-; CHECK-NEXT: movne.w r0, #-1
-; CHECK-NEXT: vmov q0[3], q0[1], r0, r7
-; CHECK-NEXT: vmov r0, s1
+; CHECK-NEXT: mov r2, r7
+; CHECK-NEXT: mov r3, r10
+; CHECK-NEXT: csel r4, r1, r0, ne
+; CHECK-NEXT: ldr r0, [sp] @ 4-byte Reload
; CHECK-NEXT: mov r1, r11
-; CHECK-NEXT: mov r6, r10
-; CHECK-NEXT: str.w r10, [sp, #12] @ 4-byte Spill
-; CHECK-NEXT: str.w r9, [sp, #8] @ 4-byte Spill
-; CHECK-NEXT: str r0, [sp, #4] @ 4-byte Spill
+; CHECK-NEXT: cmp r0, #0
+; CHECK-NEXT: it ne
+; CHECK-NEXT: movne.w r4, #-1
; CHECK-NEXT: mov r0, r8
+; CHECK-NEXT: str r4, [sp, #16] @ 4-byte Spill
+; CHECK-NEXT: lsrl r4, r9, #28
; CHECK-NEXT: bl __aeabi_dcmpgt
-; CHECK-NEXT: ldr r7, [sp] @ 4-byte Reload
-; CHECK-NEXT: mov r10, r0
+; CHECK-NEXT: mov r7, r0
; CHECK-NEXT: mov r0, r8
; CHECK-NEXT: mov r1, r11
-; CHECK-NEXT: mov r3, r5
-; CHECK-NEXT: mov r2, r7
+; CHECK-NEXT: mov r2, r5
+; CHECK-NEXT: mov r3, r6
; CHECK-NEXT: bl __aeabi_dcmpge
; CHECK-NEXT: ldr r1, [sp, #20] @ 4-byte Reload
; CHECK-NEXT: cmp r0, #0
-; CHECK-NEXT: mov r2, r6
-; CHECK-NEXT: mov r3, r9
-; CHECK-NEXT: csel r4, r1, r0, ne
-; CHECK-NEXT: cmp.w r10, #0
+; CHECK-NEXT: mov r3, r10
+; CHECK-NEXT: csel r6, r1, r0, ne
+; CHECK-NEXT: cmp r7, #0
; CHECK-NEXT: it ne
-; CHECK-NEXT: movne.w r4, #-1
-; CHECK-NEXT: ldr r5, [sp, #4] @ 4-byte Reload
-; CHECK-NEXT: mov r10, r4
-; CHECK-NEXT: mov r0, r8
+; CHECK-NEXT: movne.w r6, #-1
+; CHECK-NEXT: ldr r5, [sp, #28] @ 4-byte Reload
+; CHECK-NEXT: orr.w r0, r9, r6, lsl #4
; CHECK-NEXT: mov r1, r11
-; CHECK-NEXT: lsrl r10, r5, #28
+; CHECK-NEXT: strd r4, r0, [r5, #16]
+; CHECK-NEXT: mov r0, r8
+; CHECK-NEXT: ldr.w r9, [sp, #4] @ 4-byte Reload
+; CHECK-NEXT: mov r2, r9
; CHECK-NEXT: bl __aeabi_dcmpgt
+; CHECK-NEXT: mov r4, r0
+; CHECK-NEXT: mov r0, r8
; CHECK-NEXT: mov r1, r11
; CHECK-NEXT: ldr.w r11, [sp, #44] @ 4-byte Reload
-; CHECK-NEXT: mov r6, r0
-; CHECK-NEXT: mov r0, r8
-; CHECK-NEXT: mov r2, r7
-; CHECK-NEXT: mov r9, r7
-; CHECK-NEXT: mov r3, r11
+; CHECK-NEXT: ldr.w r8, [sp, #8] @ 4-byte Reload
+; CHECK-NEXT: mov r2, r11
+; CHECK-NEXT: mov r3, r8
; CHECK-NEXT: bl __aeabi_dcmpge
; CHECK-NEXT: ldr r1, [sp, #24] @ 4-byte Reload
; CHECK-NEXT: cmp r0, #0
+; CHECK-NEXT: mov r2, r9
+; CHECK-NEXT: mov r3, r10
; CHECK-NEXT: csel r0, r1, r0, ne
-; CHECK-NEXT: cmp r6, #0
+; CHECK-NEXT: cmp r4, #0
; CHECK-NEXT: it ne
-; CHECK-NEXT: movne.w r0, #-1
-; CHECK-NEXT: ldr r2, [sp, #28] @ 4-byte Reload
-; CHECK-NEXT: orr.w r1, r5, r0, lsl #4
-; CHECK-NEXT: strd r10, r1, [r2, #16]
-; CHECK-NEXT: mov r8, r2
-; CHECK-NEXT: ldr r1, [sp, #16] @ 4-byte Reload
-; CHECK-NEXT: and r1, r1, #15
-; CHECK-NEXT: lsrl r0, r1, #28
-; CHECK-NEXT: strb r0, [r2, #24]
+; CHECK-NEXT: movne r0, #15
+; CHECK-NEXT: and r1, r0, #15
+; CHECK-NEXT: lsrl r6, r1, #28
+; CHECK-NEXT: strb r6, [r5, #24]
; CHECK-NEXT: ldr r6, [sp, #32] @ 4-byte Reload
-; CHECK-NEXT: ldr r7, [sp, #36] @ 4-byte Reload
-; CHECK-NEXT: ldrd r3, r2, [sp, #8] @ 8-byte Folded Reload
+; CHECK-NEXT: ldr r4, [sp, #36] @ 4-byte Reload
; CHECK-NEXT: mov r0, r6
-; CHECK-NEXT: mov r1, r7
+; CHECK-NEXT: mov r1, r4
; CHECK-NEXT: bl __aeabi_dcmpgt
-; CHECK-NEXT: mov r10, r0
+; CHECK-NEXT: mov r7, r0
; CHECK-NEXT: mov r0, r6
-; CHECK-NEXT: mov r1, r7
-; CHECK-NEXT: mov r2, r9
-; CHECK-NEXT: mov r3, r11
+; CHECK-NEXT: mov r1, r4
+; CHECK-NEXT: mov r2, r11
+; CHECK-NEXT: mov r3, r8
; CHECK-NEXT: bl __aeabi_dcmpge
; CHECK-NEXT: ldr r1, [sp, #40] @ 4-byte Reload
; CHECK-NEXT: cmp r0, #0
; CHECK-NEXT: csel r0, r1, r0, ne
-; CHECK-NEXT: cmp.w r10, #0
+; CHECK-NEXT: cmp r7, #0
; CHECK-NEXT: it ne
; CHECK-NEXT: movne r0, #15
+; CHECK-NEXT: ldr r1, [sp, #16] @ 4-byte Reload
; CHECK-NEXT: and r0, r0, #15
-; CHECK-NEXT: orr.w r0, r0, r4, lsl #4
-; CHECK-NEXT: str.w r0, [r8, #12]
+; CHECK-NEXT: orr.w r0, r0, r1, lsl #4
+; CHECK-NEXT: str r0, [r5, #12]
; CHECK-NEXT: add sp, #48
; CHECK-NEXT: vpop {d8, d9}
; CHECK-NEXT: add sp, #4
@@ -4216,7 +4210,7 @@ define arm_aapcs_vfpcc <8 x i100> @test_unsigned_v8f16_v8i100(<8 x half> %f) {
; CHECK-NEXT: .pad #32
; CHECK-NEXT: sub sp, #32
; CHECK-NEXT: vmov q4, q0
-; CHECK-NEXT: mov r9, r0
+; CHECK-NEXT: mov r8, r0
; CHECK-NEXT: vcvtb.f32.f16 s30, s19
; CHECK-NEXT: vcvtb.f32.f16 s28, s18
; CHECK-NEXT: vmov r0, s30
@@ -4224,14 +4218,14 @@ define arm_aapcs_vfpcc <8 x i100> @test_unsigned_v8f16_v8i100(<8 x half> %f) {
; CHECK-NEXT: vcvtb.f32.f16 s24, s16
; CHECK-NEXT: vcvtb.f32.f16 s26, s17
; CHECK-NEXT: vldr s20, .LCPI50_1
-; CHECK-NEXT: vmov r8, s22
-; CHECK-NEXT: vmov r5, s28
+; CHECK-NEXT: vmov r4, s22
+; CHECK-NEXT: vmov r7, s28
; CHECK-NEXT: vcvtt.f32.f16 s18, s18
-; CHECK-NEXT: vmov r4, s24
+; CHECK-NEXT: vmov r9, s24
; CHECK-NEXT: vmov r6, s26
; CHECK-NEXT: bl __fixunssfti
; CHECK-NEXT: vcmp.f32 s30, #0
-; CHECK-NEXT: mov r7, r3
+; CHECK-NEXT: mov r5, r3
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: vcmp.f32 s30, s20
; CHECK-NEXT: it lt
@@ -4242,7 +4236,7 @@ define arm_aapcs_vfpcc <8 x i100> @test_unsigned_v8f16_v8i100(<8 x half> %f) {
; CHECK-NEXT: movgt.w r2, #-1
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: vcmp.f32 s30, s20
-; CHECK-NEXT: str.w r2, [r9, #83]
+; CHECK-NEXT: str.w r2, [r8, #83]
; CHECK-NEXT: it lt
; CHECK-NEXT: movlt r1, #0
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
@@ -4250,18 +4244,18 @@ define arm_aapcs_vfpcc <8 x i100> @test_unsigned_v8f16_v8i100(<8 x half> %f) {
; CHECK-NEXT: it gt
; CHECK-NEXT: movgt.w r1, #-1
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: str.w r1, [r9, #79]
+; CHECK-NEXT: str.w r1, [r8, #79]
; CHECK-NEXT: it lt
; CHECK-NEXT: movlt r0, #0
; CHECK-NEXT: vcmp.f32 s30, s20
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: it gt
; CHECK-NEXT: movgt.w r0, #-1
-; CHECK-NEXT: str.w r0, [r9, #75]
-; CHECK-NEXT: mov r0, r5
+; CHECK-NEXT: str.w r0, [r8, #75]
+; CHECK-NEXT: mov r0, r7
; CHECK-NEXT: bl __fixunssfti
; CHECK-NEXT: vcmp.f32 s28, #0
-; CHECK-NEXT: mov r5, r3
+; CHECK-NEXT: mov r7, r3
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: vcmp.f32 s28, s20
; CHECK-NEXT: it lt
@@ -4272,7 +4266,7 @@ define arm_aapcs_vfpcc <8 x i100> @test_unsigned_v8f16_v8i100(<8 x half> %f) {
; CHECK-NEXT: movgt.w r2, #-1
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: vcmp.f32 s28, s20
-; CHECK-NEXT: str.w r2, [r9, #58]
+; CHECK-NEXT: str.w r2, [r8, #58]
; CHECK-NEXT: it lt
; CHECK-NEXT: movlt r1, #0
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
@@ -4280,14 +4274,14 @@ define arm_aapcs_vfpcc <8 x i100> @test_unsigned_v8f16_v8i100(<8 x half> %f) {
; CHECK-NEXT: it gt
; CHECK-NEXT: movgt.w r1, #-1
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: str.w r1, [r9, #54]
+; CHECK-NEXT: str.w r1, [r8, #54]
; CHECK-NEXT: it lt
; CHECK-NEXT: movlt r0, #0
; CHECK-NEXT: vcmp.f32 s28, s20
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: it gt
; CHECK-NEXT: movgt.w r0, #-1
-; CHECK-NEXT: str.w r0, [r9, #50]
+; CHECK-NEXT: str.w r0, [r8, #50]
; CHECK-NEXT: mov r0, r6
; CHECK-NEXT: bl __fixunssfti
; CHECK-NEXT: vcmp.f32 s26, #0
@@ -4302,7 +4296,7 @@ define arm_aapcs_vfpcc <8 x i100> @test_unsigned_v8f16_v8i100(<8 x half> %f) {
; CHECK-NEXT: movgt.w r2, #-1
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: vcmp.f32 s26, s20
-; CHECK-NEXT: str.w r2, [r9, #33]
+; CHECK-NEXT: str.w r2, [r8, #33]
; CHECK-NEXT: it lt
; CHECK-NEXT: movlt r1, #0
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
@@ -4310,18 +4304,18 @@ define arm_aapcs_vfpcc <8 x i100> @test_unsigned_v8f16_v8i100(<8 x half> %f) {
; CHECK-NEXT: it gt
; CHECK-NEXT: movgt.w r1, #-1
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: str.w r1, [r9, #29]
+; CHECK-NEXT: str.w r1, [r8, #29]
; CHECK-NEXT: it lt
; CHECK-NEXT: movlt r0, #0
; CHECK-NEXT: vcmp.f32 s26, s20
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: it gt
; CHECK-NEXT: movgt.w r0, #-1
-; CHECK-NEXT: str.w r0, [r9, #25]
-; CHECK-NEXT: mov r0, r4
+; CHECK-NEXT: str.w r0, [r8, #25]
+; CHECK-NEXT: mov r0, r9
; CHECK-NEXT: bl __fixunssfti
; CHECK-NEXT: vcmp.f32 s24, #0
-; CHECK-NEXT: mov r4, r3
+; CHECK-NEXT: str r3, [sp, #12] @ 4-byte Spill
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: vcmp.f32 s24, s20
; CHECK-NEXT: it lt
@@ -4332,7 +4326,7 @@ define arm_aapcs_vfpcc <8 x i100> @test_unsigned_v8f16_v8i100(<8 x half> %f) {
; CHECK-NEXT: movgt.w r2, #-1
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: vcmp.f32 s24, s20
-; CHECK-NEXT: str.w r2, [r9, #8]
+; CHECK-NEXT: str.w r2, [r8, #8]
; CHECK-NEXT: it lt
; CHECK-NEXT: movlt r1, #0
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
@@ -4340,21 +4334,21 @@ define arm_aapcs_vfpcc <8 x i100> @test_unsigned_v8f16_v8i100(<8 x half> %f) {
; CHECK-NEXT: it gt
; CHECK-NEXT: movgt.w r1, #-1
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: str.w r1, [r9, #4]
+; CHECK-NEXT: str.w r1, [r8, #4]
; CHECK-NEXT: it lt
; CHECK-NEXT: movlt r0, #0
; CHECK-NEXT: vcmp.f32 s24, s20
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: it gt
; CHECK-NEXT: movgt.w r0, #-1
-; CHECK-NEXT: str.w r0, [r9]
-; CHECK-NEXT: mov r0, r8
+; CHECK-NEXT: str.w r0, [r8]
+; CHECK-NEXT: mov r0, r4
; CHECK-NEXT: bl __fixunssfti
; CHECK-NEXT: vcmp.f32 s22, #0
; CHECK-NEXT: mov r6, r0
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: vcmp.f32 s22, s20
-; CHECK-NEXT: str r3, [sp, #12] @ 4-byte Spill
+; CHECK-NEXT: str r3, [sp, #8] @ 4-byte Spill
; CHECK-NEXT: it lt
; CHECK-NEXT: movlt r6, #0
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
@@ -4363,177 +4357,174 @@ define arm_aapcs_vfpcc <8 x i100> @test_unsigned_v8f16_v8i100(<8 x half> %f) {
; CHECK-NEXT: movgt.w r6, #-1
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: it lt
-; CHECK-NEXT: movlt r7, #0
+; CHECK-NEXT: movlt r5, #0
; CHECK-NEXT: vcmp.f32 s30, s20
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: it gt
-; CHECK-NEXT: movgt r7, #15
-; CHECK-NEXT: and r0, r7, #15
-; CHECK-NEXT: mov r11, r1
+; CHECK-NEXT: movgt r5, #15
+; CHECK-NEXT: and r0, r5, #15
+; CHECK-NEXT: mov r9, r1
; CHECK-NEXT: orr.w r1, r0, r6, lsl #4
; CHECK-NEXT: vmov r0, s18
-; CHECK-NEXT: mov r10, r2
-; CHECK-NEXT: str.w r1, [r9, #87]
+; CHECK-NEXT: mov r4, r2
+; CHECK-NEXT: str.w r1, [r8, #87]
; CHECK-NEXT: bl __fixunssfti
; CHECK-NEXT: vcmp.f32 s18, #0
-; CHECK-NEXT: mov r8, r0
+; CHECK-NEXT: mov r10, r0
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: vcmp.f32 s18, s20
-; CHECK-NEXT: str r1, [sp, #8] @ 4-byte Spill
-; CHECK-NEXT: mov r7, r3
-; CHECK-NEXT: str r2, [sp, #20] @ 4-byte Spill
+; CHECK-NEXT: str r2, [sp, #4] @ 4-byte Spill
+; CHECK-NEXT: mov r5, r1
+; CHECK-NEXT: str r3, [sp, #20] @ 4-byte Spill
; CHECK-NEXT: it lt
-; CHECK-NEXT: movlt.w r8, #0
+; CHECK-NEXT: movlt.w r10, #0
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: vcmp.f32 s28, #0
; CHECK-NEXT: it gt
-; CHECK-NEXT: movgt.w r8, #-1
+; CHECK-NEXT: movgt.w r10, #-1
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: vcmp.f32 s28, s20
; CHECK-NEXT: it lt
-; CHECK-NEXT: movlt r5, #0
+; CHECK-NEXT: movlt r7, #0
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: it gt
-; CHECK-NEXT: movgt r5, #15
-; CHECK-NEXT: and r0, r5, #15
+; CHECK-NEXT: movgt r7, #15
+; CHECK-NEXT: and r0, r7, #15
; CHECK-NEXT: vcvtt.f32.f16 s28, s17
-; CHECK-NEXT: orr.w r0, r0, r8, lsl #4
-; CHECK-NEXT: str.w r0, [r9, #62]
+; CHECK-NEXT: orr.w r0, r0, r10, lsl #4
+; CHECK-NEXT: str.w r0, [r8, #62]
; CHECK-NEXT: vmov r0, s28
; CHECK-NEXT: bl __fixunssfti
; CHECK-NEXT: vcmp.f32 s28, #0
-; CHECK-NEXT: str r1, [sp, #16] @ 4-byte Spill
+; CHECK-NEXT: str r2, [sp, #16] @ 4-byte Spill
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: str r2, [sp, #28] @ 4-byte Spill
-; CHECK-NEXT: str r3, [sp, #4] @ 4-byte Spill
+; CHECK-NEXT: str r3, [sp, #28] @ 4-byte Spill
; CHECK-NEXT: it lt
; CHECK-NEXT: movlt r0, #0
; CHECK-NEXT: vcmp.f32 s28, s20
-; CHECK-NEXT: vcvtt.f32.f16 s16, s16
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: it gt
; CHECK-NEXT: movgt.w r0, #-1
+; CHECK-NEXT: mov r11, r1
; CHECK-NEXT: vcmp.f32 s26, #0
; CHECK-NEXT: mov r1, r0
; CHECK-NEXT: str r0, [sp] @ 4-byte Spill
-; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: ldr r0, [sp, #24] @ 4-byte Reload
-; CHECK-NEXT: vcmp.f32 s26, s20
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: it lt
; CHECK-NEXT: movlt r0, #0
+; CHECK-NEXT: vcmp.f32 s26, s20
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: it gt
; CHECK-NEXT: movgt r0, #15
; CHECK-NEXT: and r0, r0, #15
+; CHECK-NEXT: vcvtt.f32.f16 s16, s16
; CHECK-NEXT: orr.w r0, r0, r1, lsl #4
-; CHECK-NEXT: str.w r0, [r9, #37]
+; CHECK-NEXT: str.w r0, [r8, #37]
; CHECK-NEXT: vmov r0, s16
; CHECK-NEXT: bl __fixunssfti
; CHECK-NEXT: vcmp.f32 s16, #0
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: vcmp.f32 s16, s20
; CHECK-NEXT: it lt
; CHECK-NEXT: movlt r0, #0
+; CHECK-NEXT: vcmp.f32 s16, s20
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: vcmp.f32 s24, #0
; CHECK-NEXT: it gt
; CHECK-NEXT: movgt.w r0, #-1
+; CHECK-NEXT: vcmp.f32 s24, #0
+; CHECK-NEXT: ldr r7, [sp, #12] @ 4-byte Reload
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: it lt
-; CHECK-NEXT: movlt r4, #0
+; CHECK-NEXT: movlt r7, #0
; CHECK-NEXT: vcmp.f32 s24, s20
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: it gt
-; CHECK-NEXT: movgt r4, #15
-; CHECK-NEXT: and r5, r4, #15
+; CHECK-NEXT: movgt r7, #15
+; CHECK-NEXT: and r7, r7, #15
; CHECK-NEXT: vcmp.f32 s22, #0
-; CHECK-NEXT: orr.w r5, r5, r0, lsl #4
+; CHECK-NEXT: orr.w r7, r7, r0, lsl #4
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: str.w r5, [r9, #12]
+; CHECK-NEXT: str.w r7, [r8, #12]
; CHECK-NEXT: it lt
-; CHECK-NEXT: movlt.w r11, #0
+; CHECK-NEXT: movlt.w r9, #0
; CHECK-NEXT: vcmp.f32 s22, s20
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: it gt
-; CHECK-NEXT: movgt.w r11, #-1
+; CHECK-NEXT: movgt.w r9, #-1
; CHECK-NEXT: vcmp.f32 s22, #0
-; CHECK-NEXT: lsrl r6, r11, #28
+; CHECK-NEXT: lsrl r6, r9, #28
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: it lt
-; CHECK-NEXT: movlt.w r10, #0
+; CHECK-NEXT: movlt r4, #0
; CHECK-NEXT: vcmp.f32 s22, s20
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: it gt
-; CHECK-NEXT: movgt.w r10, #-1
-; CHECK-NEXT: orr.w r5, r11, r10, lsl #4
-; CHECK-NEXT: str.w r5, [r9, #95]
-; CHECK-NEXT: str.w r6, [r9, #91]
+; CHECK-NEXT: movgt.w r4, #-1
+; CHECK-NEXT: orr.w r7, r9, r4, lsl #4
+; CHECK-NEXT: str.w r7, [r8, #95]
+; CHECK-NEXT: str.w r6, [r8, #91]
; CHECK-NEXT: vcmp.f32 s22, #0
-; CHECK-NEXT: ldr r6, [sp, #12] @ 4-byte Reload
+; CHECK-NEXT: ldr r7, [sp, #8] @ 4-byte Reload
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: it lt
-; CHECK-NEXT: movlt r6, #0
+; CHECK-NEXT: movlt r7, #0
; CHECK-NEXT: vcmp.f32 s22, s20
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: it gt
-; CHECK-NEXT: movgt r6, #15
-; CHECK-NEXT: and r5, r6, #15
+; CHECK-NEXT: movgt r7, #15
+; CHECK-NEXT: and r7, r7, #15
; CHECK-NEXT: vcmp.f32 s18, #0
-; CHECK-NEXT: lsrl r10, r5, #28
+; CHECK-NEXT: lsrl r4, r7, #28
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: strb.w r10, [r9, #99]
+; CHECK-NEXT: strb.w r4, [r8, #99]
; CHECK-NEXT: it lt
-; CHECK-NEXT: movlt r7, #0
+; CHECK-NEXT: movlt r5, #0
; CHECK-NEXT: vcmp.f32 s18, s20
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: it gt
-; CHECK-NEXT: movgt r7, #15
+; CHECK-NEXT: movgt.w r5, #-1
; CHECK-NEXT: vcmp.f32 s18, #0
-; CHECK-NEXT: ldr r6, [sp, #8] @ 4-byte Reload
+; CHECK-NEXT: ldr r6, [sp, #4] @ 4-byte Reload
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: lsrl r10, r5, #28
; CHECK-NEXT: it lt
; CHECK-NEXT: movlt r6, #0
; CHECK-NEXT: vcmp.f32 s18, s20
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: it gt
; CHECK-NEXT: movgt.w r6, #-1
-; CHECK-NEXT: vmov q0[3], q0[1], r6, r7
+; CHECK-NEXT: orr.w r7, r5, r6, lsl #4
+; CHECK-NEXT: str.w r7, [r8, #70]
+; CHECK-NEXT: str.w r10, [r8, #66]
; CHECK-NEXT: vcmp.f32 s18, #0
-; CHECK-NEXT: vmov r5, s1
-; CHECK-NEXT: ldr r4, [sp, #20] @ 4-byte Reload
+; CHECK-NEXT: ldr r7, [sp, #20] @ 4-byte Reload
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: lsrl r8, r5, #28
; CHECK-NEXT: it lt
-; CHECK-NEXT: movlt r4, #0
+; CHECK-NEXT: movlt r7, #0
; CHECK-NEXT: vcmp.f32 s18, s20
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: it gt
-; CHECK-NEXT: movgt.w r4, #-1
-; CHECK-NEXT: orr.w r6, r5, r4, lsl #4
+; CHECK-NEXT: movgt r7, #15
; CHECK-NEXT: and r5, r7, #15
-; CHECK-NEXT: lsrl r4, r5, #28
-; CHECK-NEXT: str.w r6, [r9, #70]
-; CHECK-NEXT: str.w r8, [r9, #66]
; CHECK-NEXT: vcmp.f32 s28, #0
-; CHECK-NEXT: strb.w r4, [r9, #74]
+; CHECK-NEXT: lsrl r6, r5, #28
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: ldr r7, [sp, #4] @ 4-byte Reload
-; CHECK-NEXT: vcmp.f32 s28, s20
+; CHECK-NEXT: strb.w r6, [r8, #74]
; CHECK-NEXT: it lt
-; CHECK-NEXT: movlt r7, #0
+; CHECK-NEXT: movlt.w r11, #0
+; CHECK-NEXT: vcmp.f32 s28, s20
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: it gt
-; CHECK-NEXT: movgt r7, #15
-; CHECK-NEXT: mov r12, r7
+; CHECK-NEXT: movgt.w r11, #-1
+; CHECK-NEXT: ldr r4, [sp] @ 4-byte Reload
; CHECK-NEXT: vcmp.f32 s28, #0
-; CHECK-NEXT: ldr r7, [sp, #16] @ 4-byte Reload
+; CHECK-NEXT: ldr r6, [sp, #16] @ 4-byte Reload
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: lsrl r4, r11, #28
; CHECK-NEXT: it lt
-; CHECK-NEXT: movlt r7, #0
+; CHECK-NEXT: movlt r6, #0
; CHECK-NEXT: vcmp.f32 s28, s20
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: it gt
-; CHECK-NEXT: movgt.w r7, #-1
; CHECK-NEXT: b.w .LBB50_2
; CHECK-NEXT: .p2align 2
; CHECK-NEXT: @ %bb.1:
@@ -4541,46 +4532,34 @@ define arm_aapcs_vfpcc <8 x i100> @test_unsigned_v8f16_v8i100(<8 x half> %f) {
; CHECK-NEXT: .long 0x717fffff @ float 1.26765052E+30
; CHECK-NEXT: .p2align 1
; CHECK-NEXT: .LBB50_2:
-; CHECK-NEXT: vmov q0[3], q0[1], r7, r12
-; CHECK-NEXT: ldr r4, [sp] @ 4-byte Reload
-; CHECK-NEXT: vmov r5, s1
-; CHECK-NEXT: ldr r6, [sp, #28] @ 4-byte Reload
+; CHECK-NEXT: it gt
+; CHECK-NEXT: movgt.w r6, #-1
+; CHECK-NEXT: orr.w r7, r11, r6, lsl #4
+; CHECK-NEXT: str.w r7, [r8, #45]
+; CHECK-NEXT: str.w r4, [r8, #41]
; CHECK-NEXT: vcmp.f32 s28, #0
-; CHECK-NEXT: lsrl r4, r5, #28
+; CHECK-NEXT: ldr r7, [sp, #28] @ 4-byte Reload
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: it lt
-; CHECK-NEXT: movlt r6, #0
+; CHECK-NEXT: movlt r7, #0
; CHECK-NEXT: vcmp.f32 s28, s20
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: it gt
-; CHECK-NEXT: movgt.w r6, #-1
-; CHECK-NEXT: orr.w r7, r5, r6, lsl #4
-; CHECK-NEXT: and r5, r12, #15
+; CHECK-NEXT: movgt r7, #15
+; CHECK-NEXT: and r5, r7, #15
; CHECK-NEXT: vcmp.f32 s16, #0
; CHECK-NEXT: lsrl r6, r5, #28
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: vcmp.f32 s16, s20
-; CHECK-NEXT: str.w r7, [r9, #45]
-; CHECK-NEXT: str.w r4, [r9, #41]
-; CHECK-NEXT: strb.w r6, [r9, #49]
-; CHECK-NEXT: it lt
-; CHECK-NEXT: movlt r3, #0
-; CHECK-NEXT: vmrs APSR_nzcv, fpscr
-; CHECK-NEXT: vcmp.f32 s16, #0
-; CHECK-NEXT: it gt
-; CHECK-NEXT: movgt r3, #15
-; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: strb.w r6, [r8, #49]
; CHECK-NEXT: it lt
; CHECK-NEXT: movlt r1, #0
; CHECK-NEXT: vcmp.f32 s16, s20
; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: it gt
; CHECK-NEXT: movgt.w r1, #-1
-; CHECK-NEXT: vmov q0[3], q0[1], r1, r3
; CHECK-NEXT: vcmp.f32 s16, #0
-; CHECK-NEXT: vmov r1, s1
-; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: lsrl r0, r1, #28
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
; CHECK-NEXT: it lt
; CHECK-NEXT: movlt r2, #0
; CHECK-NEXT: vcmp.f32 s16, s20
@@ -4588,10 +4567,18 @@ define arm_aapcs_vfpcc <8 x i100> @test_unsigned_v8f16_v8i100(<8 x half> %f) {
; CHECK-NEXT: it gt
; CHECK-NEXT: movgt.w r2, #-1
; CHECK-NEXT: orr.w r1, r1, r2, lsl #4
-; CHECK-NEXT: strd r0, r1, [r9, #16]
+; CHECK-NEXT: vcmp.f32 s16, #0
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: vcmp.f32 s16, s20
+; CHECK-NEXT: strd r0, r1, [r8, #16]
+; CHECK-NEXT: it lt
+; CHECK-NEXT: movlt r3, #0
+; CHECK-NEXT: vmrs APSR_nzcv, fpscr
+; CHECK-NEXT: it gt
+; CHECK-NEXT: movgt r3, #15
; CHECK-NEXT: and r1, r3, #15
; CHECK-NEXT: lsrl r2, r1, #28
-; CHECK-NEXT: strb.w r2, [r9, #24]
+; CHECK-NEXT: strb.w r2, [r8, #24]
; CHECK-NEXT: add sp, #32
; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15}
; CHECK-NEXT: add sp, #4
diff --git a/llvm/test/CodeGen/Thumb2/mve-minmaxi.ll b/llvm/test/CodeGen/Thumb2/mve-minmaxi.ll
index f2c8440..55a621e 100644
--- a/llvm/test/CodeGen/Thumb2/mve-minmaxi.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-minmaxi.ll
@@ -207,10 +207,8 @@ define arm_aapcs_vfpcc <1 x i64> @smax1i64(<1 x i64> %a, <1 x i64> %b) {
; CHECK-NEXT: sbcs.w r12, r3, r1
; CHECK-NEXT: cset r12, lt
; CHECK-NEXT: cmp.w r12, #0
-; CHECK-NEXT: csel r1, r1, r3, ne
; CHECK-NEXT: csel r0, r0, r2, ne
-; CHECK-NEXT: vmov q0[2], q0[0], r0, r1
-; CHECK-NEXT: vmov r0, s0
+; CHECK-NEXT: csel r1, r1, r3, ne
; CHECK-NEXT: add sp, #8
; CHECK-NEXT: bx lr
%c = call <1 x i64> @llvm.smax.v1i64(<1 x i64> %a, <1 x i64> %b)
@@ -486,10 +484,8 @@ define arm_aapcs_vfpcc <1 x i64> @umax1i64(<1 x i64> %a, <1 x i64> %b) {
; CHECK-NEXT: sbcs.w r12, r3, r1
; CHECK-NEXT: cset r12, lo
; CHECK-NEXT: cmp.w r12, #0
-; CHECK-NEXT: csel r1, r1, r3, ne
; CHECK-NEXT: csel r0, r0, r2, ne
-; CHECK-NEXT: vmov q0[2], q0[0], r0, r1
-; CHECK-NEXT: vmov r0, s0
+; CHECK-NEXT: csel r1, r1, r3, ne
; CHECK-NEXT: add sp, #8
; CHECK-NEXT: bx lr
%c = call <1 x i64> @llvm.umax.v1i64(<1 x i64> %a, <1 x i64> %b)
@@ -772,10 +768,8 @@ define arm_aapcs_vfpcc <1 x i64> @smin1i64(<1 x i64> %a, <1 x i64> %b) {
; CHECK-NEXT: sbcs.w r12, r1, r3
; CHECK-NEXT: cset r12, lt
; CHECK-NEXT: cmp.w r12, #0
-; CHECK-NEXT: csel r1, r1, r3, ne
; CHECK-NEXT: csel r0, r0, r2, ne
-; CHECK-NEXT: vmov q0[2], q0[0], r0, r1
-; CHECK-NEXT: vmov r0, s0
+; CHECK-NEXT: csel r1, r1, r3, ne
; CHECK-NEXT: add sp, #8
; CHECK-NEXT: bx lr
%c = call <1 x i64> @llvm.smin.v1i64(<1 x i64> %a, <1 x i64> %b)
@@ -1051,10 +1045,8 @@ define arm_aapcs_vfpcc <1 x i64> @umin1i64(<1 x i64> %a, <1 x i64> %b) {
; CHECK-NEXT: sbcs.w r12, r1, r3
; CHECK-NEXT: cset r12, lo
; CHECK-NEXT: cmp.w r12, #0
-; CHECK-NEXT: csel r1, r1, r3, ne
; CHECK-NEXT: csel r0, r0, r2, ne
-; CHECK-NEXT: vmov q0[2], q0[0], r0, r1
-; CHECK-NEXT: vmov r0, s0
+; CHECK-NEXT: csel r1, r1, r3, ne
; CHECK-NEXT: add sp, #8
; CHECK-NEXT: bx lr
%c = call <1 x i64> @llvm.umin.v1i64(<1 x i64> %a, <1 x i64> %b)
diff --git a/llvm/test/CodeGen/Thumb2/mve-vst3.ll b/llvm/test/CodeGen/Thumb2/mve-vst3.ll
index d80dd5a..85317e1 100644
--- a/llvm/test/CodeGen/Thumb2/mve-vst3.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-vst3.ll
@@ -535,22 +535,20 @@ define void @vst3_v2i8(ptr %src, ptr %dst) {
; CHECK-NEXT: .pad #16
; CHECK-NEXT: sub sp, #16
; CHECK-NEXT: ldrb r2, [r0]
-; CHECK-NEXT: mov r4, sp
-; CHECK-NEXT: ldrb r3, [r0, #1]
-; CHECK-NEXT: ldrb.w r12, [r0, #2]
-; CHECK-NEXT: vmov q0[2], q0[0], r2, r3
-; CHECK-NEXT: ldrb.w lr, [r0, #3]
-; CHECK-NEXT: vmov r2, s0
-; CHECK-NEXT: ldrb r5, [r0, #5]
+; CHECK-NEXT: mov r5, sp
+; CHECK-NEXT: ldrb r3, [r0, #2]
; CHECK-NEXT: vmov.16 q0[0], r2
+; CHECK-NEXT: ldrb.w r12, [r0, #1]
+; CHECK-NEXT: ldrb.w lr, [r0, #3]
+; CHECK-NEXT: vmov.16 q0[1], r3
+; CHECK-NEXT: ldrb r4, [r0, #5]
; CHECK-NEXT: ldrb r0, [r0, #4]
-; CHECK-NEXT: vmov.16 q0[1], r12
; CHECK-NEXT: vmov.16 q0[2], r0
; CHECK-NEXT: add r0, sp, #8
-; CHECK-NEXT: vmov.16 q0[3], r3
+; CHECK-NEXT: vmov.16 q0[3], r12
; CHECK-NEXT: vmov.16 q0[4], lr
-; CHECK-NEXT: vmov.16 q0[5], r5
-; CHECK-NEXT: vstrb.16 q0, [r4]
+; CHECK-NEXT: vmov.16 q0[5], r4
+; CHECK-NEXT: vstrb.16 q0, [r5]
; CHECK-NEXT: vstrb.16 q0, [r0]
; CHECK-NEXT: vldrh.u32 q0, [r0]
; CHECK-NEXT: ldr r2, [sp]
diff --git a/llvm/test/CodeGen/Thumb2/mve-vst4.ll b/llvm/test/CodeGen/Thumb2/mve-vst4.ll
index f3a65c4..b369044 100644
--- a/llvm/test/CodeGen/Thumb2/mve-vst4.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-vst4.ll
@@ -238,27 +238,23 @@ define void @vst4_v2i16(ptr %src, ptr %dst) {
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r4, r5, r6, lr}
; CHECK-NEXT: push {r4, r5, r6, lr}
-; CHECK-NEXT: ldrh r3, [r0, #2]
-; CHECK-NEXT: ldrh r2, [r0]
-; CHECK-NEXT: ldrh.w r12, [r0, #10]
-; CHECK-NEXT: ldrh.w lr, [r0, #4]
-; CHECK-NEXT: vmov q1[2], q1[0], r2, r3
-; CHECK-NEXT: ldrh r4, [r0, #12]
-; CHECK-NEXT: ldrh r5, [r0, #6]
+; CHECK-NEXT: ldrh r2, [r0, #4]
+; CHECK-NEXT: ldrh r3, [r0, #8]
+; CHECK-NEXT: ldrh.w r12, [r0, #12]
+; CHECK-NEXT: ldrh.w lr, [r0, #2]
+; CHECK-NEXT: ldrh r4, [r0, #6]
+; CHECK-NEXT: ldrh r5, [r0, #10]
; CHECK-NEXT: ldrh r6, [r0, #14]
-; CHECK-NEXT: ldrh r0, [r0, #8]
-; CHECK-NEXT: vmov q0[2], q0[0], r0, r12
-; CHECK-NEXT: vmov r0, s4
-; CHECK-NEXT: vmov.16 q1[0], r0
-; CHECK-NEXT: vmov r0, s0
-; CHECK-NEXT: vmov.16 q1[1], lr
-; CHECK-NEXT: vmov.16 q1[2], r0
-; CHECK-NEXT: vmov.16 q1[3], r4
-; CHECK-NEXT: vmov.16 q1[4], r3
-; CHECK-NEXT: vmov.16 q1[5], r5
-; CHECK-NEXT: vmov.16 q1[6], r12
-; CHECK-NEXT: vmov.16 q1[7], r6
-; CHECK-NEXT: vstrh.16 q1, [r1]
+; CHECK-NEXT: ldrh r0, [r0]
+; CHECK-NEXT: vmov.16 q0[0], r0
+; CHECK-NEXT: vmov.16 q0[1], r2
+; CHECK-NEXT: vmov.16 q0[2], r3
+; CHECK-NEXT: vmov.16 q0[3], r12
+; CHECK-NEXT: vmov.16 q0[4], lr
+; CHECK-NEXT: vmov.16 q0[5], r4
+; CHECK-NEXT: vmov.16 q0[6], r5
+; CHECK-NEXT: vmov.16 q0[7], r6
+; CHECK-NEXT: vstrh.16 q0, [r1]
; CHECK-NEXT: pop {r4, r5, r6, pc}
entry:
%l1 = load <2 x i16>, ptr %src, align 4
@@ -475,26 +471,22 @@ define void @vst4_v2i8(ptr %src, ptr %dst) {
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r4, r5, r6, lr}
; CHECK-NEXT: push {r4, r5, r6, lr}
-; CHECK-NEXT: ldrb r4, [r0, #5]
-; CHECK-NEXT: ldrb r5, [r0, #4]
-; CHECK-NEXT: ldrb r2, [r0]
-; CHECK-NEXT: ldrb r3, [r0, #1]
-; CHECK-NEXT: vmov q0[2], q0[0], r5, r4
-; CHECK-NEXT: vmov r5, s0
-; CHECK-NEXT: ldrb.w r12, [r0, #2]
-; CHECK-NEXT: vmov q0[2], q0[0], r2, r3
-; CHECK-NEXT: ldrb.w lr, [r0, #3]
-; CHECK-NEXT: vmov r2, s0
-; CHECK-NEXT: ldrb r6, [r0, #7]
-; CHECK-NEXT: vmov.16 q0[0], r2
-; CHECK-NEXT: ldrb r0, [r0, #6]
-; CHECK-NEXT: vmov.16 q0[1], r12
-; CHECK-NEXT: vmov.16 q0[2], r5
-; CHECK-NEXT: vmov.16 q0[3], r0
-; CHECK-NEXT: vmov.16 q0[4], r3
-; CHECK-NEXT: vmov.16 q0[5], lr
-; CHECK-NEXT: vmov.16 q0[6], r4
-; CHECK-NEXT: vmov.16 q0[7], r6
+; CHECK-NEXT: ldrb r4, [r0]
+; CHECK-NEXT: ldrb r6, [r0, #2]
+; CHECK-NEXT: vmov.16 q0[0], r4
+; CHECK-NEXT: ldrb r2, [r0, #4]
+; CHECK-NEXT: vmov.16 q0[1], r6
+; CHECK-NEXT: ldrb r3, [r0, #6]
+; CHECK-NEXT: vmov.16 q0[2], r2
+; CHECK-NEXT: ldrb r5, [r0, #1]
+; CHECK-NEXT: vmov.16 q0[3], r3
+; CHECK-NEXT: ldrb.w r12, [r0, #5]
+; CHECK-NEXT: ldrb.w lr, [r0, #7]
+; CHECK-NEXT: vmov.16 q0[4], r5
+; CHECK-NEXT: ldrb r0, [r0, #3]
+; CHECK-NEXT: vmov.16 q0[5], r0
+; CHECK-NEXT: vmov.16 q0[6], r12
+; CHECK-NEXT: vmov.16 q0[7], lr
; CHECK-NEXT: vstrb.16 q0, [r1]
; CHECK-NEXT: pop {r4, r5, r6, pc}
entry: