aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--llvm/lib/Target/AArch64/AArch64ISelLowering.cpp39
-rw-r--r--llvm/test/CodeGen/AArch64/arm64-neon-simd-ldst-one.ll20
-rw-r--r--llvm/test/CodeGen/AArch64/arm64-sli-sri-opt.ll29
-rw-r--r--llvm/test/CodeGen/AArch64/arm64-tbl.ll15
-rw-r--r--llvm/test/CodeGen/AArch64/const-vector-big-endian.ll27
-rw-r--r--llvm/test/CodeGen/AArch64/constant-pool-partition.ll28
-rw-r--r--llvm/test/CodeGen/AArch64/extract-vector-elt.ll5
-rw-r--r--llvm/test/CodeGen/AArch64/fcvt_combine.ll80
-rw-r--r--llvm/test/CodeGen/AArch64/fixed-vector-interleave.ll5
-rw-r--r--llvm/test/CodeGen/AArch64/neon-abd.ll9
-rw-r--r--llvm/test/CodeGen/AArch64/neon-mov.ll10
-rw-r--r--llvm/test/CodeGen/AArch64/neon-stepvector.ll7
-rw-r--r--llvm/test/CodeGen/AArch64/pr58350.ll12
-rw-r--r--llvm/test/CodeGen/AArch64/shuffle-tbl34.ll60
-rw-r--r--llvm/test/CodeGen/AArch64/shuffles.ll5
-rw-r--r--llvm/test/CodeGen/AArch64/srem-vector-lkk.ll38
-rw-r--r--llvm/test/CodeGen/AArch64/urem-seteq-illegal-types.ll13
-rw-r--r--llvm/test/CodeGen/AArch64/urem-vector-lkk.ll28
-rw-r--r--llvm/test/CodeGen/AArch64/vec-combine-compare-to-bitmask.ll5
-rw-r--r--llvm/test/CodeGen/AArch64/vec-combine-compare-truncate-store.ll8
-rw-r--r--llvm/test/CodeGen/AArch64/vector-extract-last-active.ll8
21 files changed, 280 insertions, 171 deletions
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 713b40d..22418e6 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -16379,6 +16379,45 @@ SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op,
return Val;
}
+ // Handle 64-bit constant BUILD_VECTORs by packing them into an i64 immediate.
+ // This is cheaper than a load if the immediate can be materialized in a few
+ // mov instructions. This optimization is disabled for big-endian targets for
+ // now.
+ if (BVN->isConstant() && VT.isFixedLengthVector() &&
+ VT.getSizeInBits() == 64 && !DAG.getDataLayout().isBigEndian()) {
+ const SDLoc DL(Op);
+ APInt PackedVal(64, 0);
+ unsigned BitPos = 0;
+
+ unsigned EltSizeInBits = VT.getScalarSizeInBits();
+ for (unsigned i = 0, e = BVN->getNumOperands(); i != e; ++i) {
+ const SDValue &LaneOp = BVN->getOperand(i);
+ APInt LaneBits;
+ if (LaneOp.getOpcode() == ISD::UNDEF)
+ LaneBits = APInt(EltSizeInBits, 0);
+ else if (auto *C = dyn_cast<ConstantSDNode>(LaneOp))
+ LaneBits = C->getAPIntValue();
+ else if (auto *CFP = dyn_cast<ConstantFPSDNode>(LaneOp))
+ LaneBits = CFP->getValueAPF().bitcastToAPInt();
+ else
+ return SDValue();
+
+ PackedVal |= LaneBits.trunc(VT.getScalarSizeInBits()).zext(64) << BitPos;
+ BitPos += EltSizeInBits;
+ }
+
+ // This optimization kicks in if the number of mov instructions
+ // is under 2
+ SmallVector<AArch64_IMM::ImmInsnModel, 4> Insns;
+ AArch64_IMM::expandMOVImm(PackedVal.getZExtValue(), 64, Insns);
+ if (Insns.size() > 2)
+ return SDValue();
+
+ SDValue ScalarConst = DAG.getConstant(PackedVal, DL, MVT::i64);
+ // Use BITCAST to reinterpret the scalar constant's bits as a vector.
+ return DAG.getNode(ISD::BITCAST, DL, VT, ScalarConst);
+ }
+
// This will generate a load from the constant pool.
if (isConstant) {
LLVM_DEBUG(
diff --git a/llvm/test/CodeGen/AArch64/arm64-neon-simd-ldst-one.ll b/llvm/test/CodeGen/AArch64/arm64-neon-simd-ldst-one.ll
index 7483eab..34499c6 100644
--- a/llvm/test/CodeGen/AArch64/arm64-neon-simd-ldst-one.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-neon-simd-ldst-one.ll
@@ -140,12 +140,20 @@ entry:
}
define <2 x i32> @test_ld_from_poll_v2i32(<2 x i32> %a) {
-; CHECK-LABEL: test_ld_from_poll_v2i32:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: adrp x8, .LCPI8_0
-; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI8_0]
-; CHECK-NEXT: add v0.2s, v0.2s, v1.2s
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_ld_from_poll_v2i32:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: mov x8, #1 // =0x1
+; CHECK-SD-NEXT: movk x8, #2, lsl #32
+; CHECK-SD-NEXT: fmov d1, x8
+; CHECK-SD-NEXT: add v0.2s, v0.2s, v1.2s
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_ld_from_poll_v2i32:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: adrp x8, .LCPI8_0
+; CHECK-GI-NEXT: ldr d1, [x8, :lo12:.LCPI8_0]
+; CHECK-GI-NEXT: add v0.2s, v0.2s, v1.2s
+; CHECK-GI-NEXT: ret
entry:
%b = add <2 x i32> %a, <i32 1, i32 2>
ret <2 x i32> %b
diff --git a/llvm/test/CodeGen/AArch64/arm64-sli-sri-opt.ll b/llvm/test/CodeGen/AArch64/arm64-sli-sri-opt.ll
index 0e1e15f..e80afe2 100644
--- a/llvm/test/CodeGen/AArch64/arm64-sli-sri-opt.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-sli-sri-opt.ll
@@ -684,15 +684,26 @@ define void @testLeftShouldNotCreateSLI1x128(<1 x i128> %src1, <1 x i128> %src2,
}
define void @testLeftNotAllConstantBuildVec8x8(<8 x i8> %src1, <8 x i8> %src2, ptr %dest) nounwind {
-; CHECK-LABEL: testLeftNotAllConstantBuildVec8x8:
-; CHECK: // %bb.0:
-; CHECK-NEXT: adrp x8, .LCPI29_0
-; CHECK-NEXT: shl.8b v1, v1, #3
-; CHECK-NEXT: ldr d2, [x8, :lo12:.LCPI29_0]
-; CHECK-NEXT: and.8b v0, v0, v2
-; CHECK-NEXT: orr.8b v0, v0, v1
-; CHECK-NEXT: str d0, [x0]
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: testLeftNotAllConstantBuildVec8x8:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: mov x8, #506381209866536711 // =0x707070707070707
+; CHECK-SD-NEXT: shl.8b v1, v1, #3
+; CHECK-SD-NEXT: orr x8, x8, #0x7f8000007f80000
+; CHECK-SD-NEXT: fmov d2, x8
+; CHECK-SD-NEXT: and.8b v0, v0, v2
+; CHECK-SD-NEXT: orr.8b v0, v0, v1
+; CHECK-SD-NEXT: str d0, [x0]
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: testLeftNotAllConstantBuildVec8x8:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: adrp x8, .LCPI29_0
+; CHECK-GI-NEXT: shl.8b v1, v1, #3
+; CHECK-GI-NEXT: ldr d2, [x8, :lo12:.LCPI29_0]
+; CHECK-GI-NEXT: and.8b v0, v0, v2
+; CHECK-GI-NEXT: orr.8b v0, v0, v1
+; CHECK-GI-NEXT: str d0, [x0]
+; CHECK-GI-NEXT: ret
%and.i = and <8 x i8> %src1, <i8 7, i8 7, i8 255, i8 7, i8 7, i8 7, i8 255, i8 7>
%vshl_n = shl <8 x i8> %src2, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
%result = or <8 x i8> %and.i, %vshl_n
diff --git a/llvm/test/CodeGen/AArch64/arm64-tbl.ll b/llvm/test/CodeGen/AArch64/arm64-tbl.ll
index fe5a6f1..27694bd 100644
--- a/llvm/test/CodeGen/AArch64/arm64-tbl.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-tbl.ll
@@ -140,16 +140,6 @@ define <16 x i8> @tbl4_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %
ret <16 x i8> %tmp3
}
-; CHECK-SD-LABEL: .LCPI8_0:
-; CHECK-SD: .byte 0 // 0x0
-; CHECK-SD-NEXT: .byte 4 // 0x4
-; CHECK-SD-NEXT: .byte 8 // 0x8
-; CHECK-SD-NEXT: .byte 12 // 0xc
-; CHECK-SD-NEXT: .byte 255 // 0xff
-; CHECK-SD-NEXT: .byte 255 // 0xff
-; CHECK-SD-NEXT: .byte 255 // 0xff
-; CHECK-SD-NEXT: .byte 255 // 0xff
-
; CHECK-GI-LABEL: .LCPI8_0:
; CHECK-GI: .byte 0 // 0x0
; CHECK-GI-NEXT: .byte 1 // 0x1
@@ -172,12 +162,13 @@ define <16 x i8> @tbl4_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %
define <8 x i8> @shuffled_tbl2_to_tbl4_v8i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i8> %d) {
; CHECK-SD-LABEL: shuffled_tbl2_to_tbl4_v8i8:
; CHECK-SD: // %bb.0:
-; CHECK-SD-NEXT: adrp x8, .LCPI8_0
+; CHECK-SD-NEXT: mov x8, #-64512 // =0xffffffffffff0400
; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
; CHECK-SD-NEXT: // kill: def $q3 killed $q3 killed $q2_q3 def $q2_q3
-; CHECK-SD-NEXT: ldr d4, [x8, :lo12:.LCPI8_0]
+; CHECK-SD-NEXT: movk x8, #3080, lsl #16
; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
; CHECK-SD-NEXT: // kill: def $q2 killed $q2 killed $q2_q3 def $q2_q3
+; CHECK-SD-NEXT: fmov d4, x8
; CHECK-SD-NEXT: tbl.8b v0, { v0, v1 }, v4
; CHECK-SD-NEXT: tbl.8b v1, { v2, v3 }, v4
; CHECK-SD-NEXT: mov.s v0[1], v1[1]
diff --git a/llvm/test/CodeGen/AArch64/const-vector-big-endian.ll b/llvm/test/CodeGen/AArch64/const-vector-big-endian.ll
new file mode 100644
index 0000000..0b49b7f
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/const-vector-big-endian.ll
@@ -0,0 +1,27 @@
+; Verify if materialization is not kicking in for big-endian targets
+; RUN: llc -mtriple=aarch64_be-linux-gnu -o - %s | FileCheck %s
+
+define <2 x i32> @test_const_v2i32_big_endian() {
+; CHECK-LABEL: test_const_v2i32_big_endian:
+; CHECK: ldr d0, [x8, :lo12:.LCPI0_0]
+; CHECK-NOT: mov
+; CHECK: ret
+ ret <2 x i32> <i32 1, i32 2>
+}
+
+define <4 x i16> @test_const_v4i16_big_endian() {
+; CHECK-LABEL: test_const_v4i16_big_endian:
+; CHECK: ldr d0, [x8, :lo12:.LCPI1_0]
+; CHECK-NOT: mov
+; CHECK: ret
+ ret <4 x i16> <i16 1, i16 2, i16 3, i16 4>
+}
+
+define <8 x i8> @test_const_v8i8_big_endian() {
+; CHECK-LABEL: test_const_v8i8_big_endian:
+; CHECK: ldr d0, [x8, :lo12:.LCPI2_0]
+; CHECK-NOT: mov
+; CHECK: ret
+ ret <8 x i8> <i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8>
+}
+
diff --git a/llvm/test/CodeGen/AArch64/constant-pool-partition.ll b/llvm/test/CodeGen/AArch64/constant-pool-partition.ll
index 9f4b3e2..7e74ab4 100644
--- a/llvm/test/CodeGen/AArch64/constant-pool-partition.ll
+++ b/llvm/test/CodeGen/AArch64/constant-pool-partition.ll
@@ -30,27 +30,27 @@
; CHECK-NEXT: .section .rodata.cst8,"aM",@progbits,8
; CHECK-NEXT: .p2align
; CHECK-NEXT: .LCPI0_2:
-; CHECK-NEXT: .byte 0 // 0x0
+; CHECK-NEXT: .byte 1 // 0x1
+; CHECK-NEXT: .byte 2 // 0x2
+; CHECK-NEXT: .byte 3 // 0x3
; CHECK-NEXT: .byte 4 // 0x4
+; CHECK-NEXT: .byte 5 // 0x5
+; CHECK-NEXT: .byte 6 // 0x6
+; CHECK-NEXT: .byte 7 // 0x7
; CHECK-NEXT: .byte 8 // 0x8
-; CHECK-NEXT: .byte 12 // 0xc
-; CHECK-NEXT: .byte 255 // 0xff
-; CHECK-NEXT: .byte 255 // 0xff
-; CHECK-NEXT: .byte 255 // 0xff
-; CHECK-NEXT: .byte 255 // 0xff
;; Constant pools for function @unprofiled_func
; CHECK: .section .rodata.cst8,"aM",@progbits,8
; CHECK-NEXT: .p2align
; CHECK-NEXT: .LCPI1_0:
-; CHECK-NEXT: .byte 0 // 0x0
+; CHECK-NEXT: .byte 1 // 0x1
+; CHECK-NEXT: .byte 2 // 0x2
+; CHECK-NEXT: .byte 3 // 0x3
; CHECK-NEXT: .byte 4 // 0x4
+; CHECK-NEXT: .byte 5 // 0x5
+; CHECK-NEXT: .byte 6 // 0x6
+; CHECK-NEXT: .byte 7 // 0x7
; CHECK-NEXT: .byte 8 // 0x8
-; CHECK-NEXT: .byte 12 // 0xc
-; CHECK-NEXT: .byte 255 // 0xff
-; CHECK-NEXT: .byte 255 // 0xff
-; CHECK-NEXT: .byte 255 // 0xff
-; CHECK-NEXT: .byte 255 // 0xff
; CHECK-NEXT: .section .rodata.cst16,"aM",@progbits,16
; CHECK-NEXT: .p2align
; CHECK-NEXT: .LCPI1_1:
@@ -94,7 +94,7 @@
define i32 @cold_func(double %x, <16 x i8> %a, <16 x i8> %b) !prof !16 {
%2 = tail call i32 (...) @func_taking_arbitrary_param(double 6.800000e-01)
%num = tail call i32 (...) @func_taking_arbitrary_param(double 6.8500000e-01)
- %t1 = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> %a, <16 x i8> %b, <8 x i8> <i8 0, i8 4, i8 8, i8 12, i8 -1, i8 -1, i8 -1, i8 -1>)
+ %t1 = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> %a, <16 x i8> %b, <8 x i8> <i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8>)
%t2 = bitcast <8 x i8> %t1 to <2 x i32>
%3 = extractelement <2 x i32> %t2, i32 1
%sum = add i32 %2, %3
@@ -106,7 +106,7 @@ declare <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8>, <16 x i8>, <8 x i8>)
declare i32 @func_taking_arbitrary_param(...)
define <4 x i1> @unprofiled_func(<16 x i8> %a, <16 x i8> %b) {
- %t1 = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> %a, <16 x i8> %b, <8 x i8> <i8 0, i8 4, i8 8, i8 12, i8 -1, i8 -1, i8 -1, i8 -1>)
+ %t1 = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> %a, <16 x i8> %b, <8 x i8> <i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8>)
%t2 = bitcast <8 x i8> %t1 to <4 x i16>
%t3 = zext <4 x i16> %t2 to <4 x i32>
%t4 = add <4 x i32> %t3, <i32 2, i32 3, i32 5, i32 7>
diff --git a/llvm/test/CodeGen/AArch64/extract-vector-elt.ll b/llvm/test/CodeGen/AArch64/extract-vector-elt.ll
index babb4ed..23bc6e0 100644
--- a/llvm/test/CodeGen/AArch64/extract-vector-elt.ll
+++ b/llvm/test/CodeGen/AArch64/extract-vector-elt.ll
@@ -1037,10 +1037,11 @@ entry:
define i32 @extract_v4i32_phi(i64 %val, i32 %limit, ptr %ptr) {
; CHECK-SD-LABEL: extract_v4i32_phi:
; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: mov x8, #1 // =0x1
; CHECK-SD-NEXT: dup v1.2s, w0
-; CHECK-SD-NEXT: adrp x8, .LCPI41_0
; CHECK-SD-NEXT: movi v0.2s, #16
-; CHECK-SD-NEXT: ldr d2, [x8, :lo12:.LCPI41_0]
+; CHECK-SD-NEXT: movk x8, #2, lsl #32
+; CHECK-SD-NEXT: fmov d2, x8
; CHECK-SD-NEXT: add v1.2s, v1.2s, v2.2s
; CHECK-SD-NEXT: .LBB41_1: // %loop
; CHECK-SD-NEXT: // =>This Inner Loop Header: Depth=1
diff --git a/llvm/test/CodeGen/AArch64/fcvt_combine.ll b/llvm/test/CodeGen/AArch64/fcvt_combine.ll
index 38defb6..c190c63 100644
--- a/llvm/test/CodeGen/AArch64/fcvt_combine.ll
+++ b/llvm/test/CodeGen/AArch64/fcvt_combine.ll
@@ -141,13 +141,39 @@ define <2 x i32> @test8(<2 x float> %f) {
; Test which should not fold due to non-matching power of 2.
define <2 x i32> @test9(<2 x float> %f) {
-; CHECK-LABEL: test9:
-; CHECK: // %bb.0:
-; CHECK-NEXT: adrp x8, .LCPI8_0
-; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI8_0]
-; CHECK-NEXT: fmul v0.2s, v0.2s, v1.2s
-; CHECK-NEXT: fcvtzu v0.2s, v0.2s
-; CHECK-NEXT: ret
+; CHECK-NO16-SD-LABEL: test9:
+; CHECK-NO16-SD: // %bb.0:
+; CHECK-NO16-SD-NEXT: mov x8, #1098907648 // =0x41800000
+; CHECK-NO16-SD-NEXT: movk x8, #16640, lsl #48
+; CHECK-NO16-SD-NEXT: fmov d1, x8
+; CHECK-NO16-SD-NEXT: fmul v0.2s, v0.2s, v1.2s
+; CHECK-NO16-SD-NEXT: fcvtzu v0.2s, v0.2s
+; CHECK-NO16-SD-NEXT: ret
+;
+; CHECK-FP16-SD-LABEL: test9:
+; CHECK-FP16-SD: // %bb.0:
+; CHECK-FP16-SD-NEXT: mov x8, #1098907648 // =0x41800000
+; CHECK-FP16-SD-NEXT: movk x8, #16640, lsl #48
+; CHECK-FP16-SD-NEXT: fmov d1, x8
+; CHECK-FP16-SD-NEXT: fmul v0.2s, v0.2s, v1.2s
+; CHECK-FP16-SD-NEXT: fcvtzu v0.2s, v0.2s
+; CHECK-FP16-SD-NEXT: ret
+;
+; CHECK-NO16-GI-LABEL: test9:
+; CHECK-NO16-GI: // %bb.0:
+; CHECK-NO16-GI-NEXT: adrp x8, .LCPI8_0
+; CHECK-NO16-GI-NEXT: ldr d1, [x8, :lo12:.LCPI8_0]
+; CHECK-NO16-GI-NEXT: fmul v0.2s, v0.2s, v1.2s
+; CHECK-NO16-GI-NEXT: fcvtzu v0.2s, v0.2s
+; CHECK-NO16-GI-NEXT: ret
+;
+; CHECK-FP16-GI-LABEL: test9:
+; CHECK-FP16-GI: // %bb.0:
+; CHECK-FP16-GI-NEXT: adrp x8, .LCPI8_0
+; CHECK-FP16-GI-NEXT: ldr d1, [x8, :lo12:.LCPI8_0]
+; CHECK-FP16-GI-NEXT: fmul v0.2s, v0.2s, v1.2s
+; CHECK-FP16-GI-NEXT: fcvtzu v0.2s, v0.2s
+; CHECK-FP16-GI-NEXT: ret
%mul.i = fmul <2 x float> %f, <float 16.000000e+00, float 8.000000e+00>
%vcvt.i = fptoui <2 x float> %mul.i to <2 x i32>
ret <2 x i32> %vcvt.i
@@ -684,13 +710,39 @@ define <2 x i32> @test8_sat(<2 x float> %f) {
; Test which should not fold due to non-matching power of 2.
define <2 x i32> @test9_sat(<2 x float> %f) {
-; CHECK-LABEL: test9_sat:
-; CHECK: // %bb.0:
-; CHECK-NEXT: adrp x8, .LCPI27_0
-; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI27_0]
-; CHECK-NEXT: fmul v0.2s, v0.2s, v1.2s
-; CHECK-NEXT: fcvtzu v0.2s, v0.2s
-; CHECK-NEXT: ret
+; CHECK-NO16-SD-LABEL: test9_sat:
+; CHECK-NO16-SD: // %bb.0:
+; CHECK-NO16-SD-NEXT: mov x8, #1098907648 // =0x41800000
+; CHECK-NO16-SD-NEXT: movk x8, #16640, lsl #48
+; CHECK-NO16-SD-NEXT: fmov d1, x8
+; CHECK-NO16-SD-NEXT: fmul v0.2s, v0.2s, v1.2s
+; CHECK-NO16-SD-NEXT: fcvtzu v0.2s, v0.2s
+; CHECK-NO16-SD-NEXT: ret
+;
+; CHECK-FP16-SD-LABEL: test9_sat:
+; CHECK-FP16-SD: // %bb.0:
+; CHECK-FP16-SD-NEXT: mov x8, #1098907648 // =0x41800000
+; CHECK-FP16-SD-NEXT: movk x8, #16640, lsl #48
+; CHECK-FP16-SD-NEXT: fmov d1, x8
+; CHECK-FP16-SD-NEXT: fmul v0.2s, v0.2s, v1.2s
+; CHECK-FP16-SD-NEXT: fcvtzu v0.2s, v0.2s
+; CHECK-FP16-SD-NEXT: ret
+;
+; CHECK-NO16-GI-LABEL: test9_sat:
+; CHECK-NO16-GI: // %bb.0:
+; CHECK-NO16-GI-NEXT: adrp x8, .LCPI27_0
+; CHECK-NO16-GI-NEXT: ldr d1, [x8, :lo12:.LCPI27_0]
+; CHECK-NO16-GI-NEXT: fmul v0.2s, v0.2s, v1.2s
+; CHECK-NO16-GI-NEXT: fcvtzu v0.2s, v0.2s
+; CHECK-NO16-GI-NEXT: ret
+;
+; CHECK-FP16-GI-LABEL: test9_sat:
+; CHECK-FP16-GI: // %bb.0:
+; CHECK-FP16-GI-NEXT: adrp x8, .LCPI27_0
+; CHECK-FP16-GI-NEXT: ldr d1, [x8, :lo12:.LCPI27_0]
+; CHECK-FP16-GI-NEXT: fmul v0.2s, v0.2s, v1.2s
+; CHECK-FP16-GI-NEXT: fcvtzu v0.2s, v0.2s
+; CHECK-FP16-GI-NEXT: ret
%mul.i = fmul <2 x float> %f, <float 16.000000e+00, float 8.000000e+00>
%vcvt.i = call <2 x i32> @llvm.fptoui.sat.v2i32.v2f32(<2 x float> %mul.i)
ret <2 x i32> %vcvt.i
diff --git a/llvm/test/CodeGen/AArch64/fixed-vector-interleave.ll b/llvm/test/CodeGen/AArch64/fixed-vector-interleave.ll
index 6e24105..eb9d576 100644
--- a/llvm/test/CodeGen/AArch64/fixed-vector-interleave.ll
+++ b/llvm/test/CodeGen/AArch64/fixed-vector-interleave.ll
@@ -151,8 +151,9 @@ define <4 x i16> @interleave2_same_const_splat_v4i16() {
define <4 x i16> @interleave2_diff_const_splat_v4i16() {
; CHECK-SD-LABEL: interleave2_diff_const_splat_v4i16:
; CHECK-SD: // %bb.0:
-; CHECK-SD-NEXT: adrp x8, .LCPI11_0
-; CHECK-SD-NEXT: ldr d0, [x8, :lo12:.LCPI11_0]
+; CHECK-SD-NEXT: mov x8, #1125899907104768 // =0x4000000040000
+; CHECK-SD-NEXT: orr x8, x8, #0x300000003
+; CHECK-SD-NEXT: fmov d0, x8
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: interleave2_diff_const_splat_v4i16:
diff --git a/llvm/test/CodeGen/AArch64/neon-abd.ll b/llvm/test/CodeGen/AArch64/neon-abd.ll
index e0406e4..931963e 100644
--- a/llvm/test/CodeGen/AArch64/neon-abd.ll
+++ b/llvm/test/CodeGen/AArch64/neon-abd.ll
@@ -598,13 +598,14 @@ entry:
define <8 x i32> @sabd_8h_bv_imm(<8 x i16> %a) {
; CHECK-LABEL: sabd_8h_bv_imm:
; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mov x9, #549747425280 // =0x7fff800000
; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8
; CHECK-NEXT: adrp x8, .LCPI45_0
-; CHECK-NEXT: adrp x9, .LCPI45_1
+; CHECK-NEXT: movk x9, #69, lsl #48
; CHECK-NEXT: ldr d2, [x8, :lo12:.LCPI45_0]
-; CHECK-NEXT: ldr d3, [x9, :lo12:.LCPI45_1]
-; CHECK-NEXT: sabdl v0.4s, v0.4h, v3.4h
-; CHECK-NEXT: sabdl v1.4s, v1.4h, v2.4h
+; CHECK-NEXT: fmov d3, x9
+; CHECK-NEXT: sabdl v0.4s, v0.4h, v2.4h
+; CHECK-NEXT: sabdl v1.4s, v1.4h, v3.4h
; CHECK-NEXT: ret
entry:
%conv = sext <8 x i16> %a to <8 x i32>
diff --git a/llvm/test/CodeGen/AArch64/neon-mov.ll b/llvm/test/CodeGen/AArch64/neon-mov.ll
index 4f65786..ca5af2c 100644
--- a/llvm/test/CodeGen/AArch64/neon-mov.ll
+++ b/llvm/test/CodeGen/AArch64/neon-mov.ll
@@ -600,16 +600,18 @@ declare <2 x i32> @test_movi1d(<2 x i32>, <2 x i32>)
define <2 x i32> @movi1d() {
; CHECK-NOFP16-SD-LABEL: movi1d:
; CHECK-NOFP16-SD: // %bb.0:
+; CHECK-NOFP16-SD-NEXT: mov x8, #9223231299366420480 // =0x7fff800000000000
; CHECK-NOFP16-SD-NEXT: movi d1, #0x00ffffffff0000
-; CHECK-NOFP16-SD-NEXT: adrp x8, .LCPI57_0
-; CHECK-NOFP16-SD-NEXT: ldr d0, [x8, :lo12:.LCPI57_0]
+; CHECK-NOFP16-SD-NEXT: movk x8, #32768, lsl #16
+; CHECK-NOFP16-SD-NEXT: fmov d0, x8
; CHECK-NOFP16-SD-NEXT: b test_movi1d
;
; CHECK-FP16-SD-LABEL: movi1d:
; CHECK-FP16-SD: // %bb.0:
+; CHECK-FP16-SD-NEXT: mov x8, #9223231299366420480 // =0x7fff800000000000
; CHECK-FP16-SD-NEXT: movi d1, #0x00ffffffff0000
-; CHECK-FP16-SD-NEXT: adrp x8, .LCPI57_0
-; CHECK-FP16-SD-NEXT: ldr d0, [x8, :lo12:.LCPI57_0]
+; CHECK-FP16-SD-NEXT: movk x8, #32768, lsl #16
+; CHECK-FP16-SD-NEXT: fmov d0, x8
; CHECK-FP16-SD-NEXT: b test_movi1d
;
; CHECK-NOFP16-GI-LABEL: movi1d:
diff --git a/llvm/test/CodeGen/AArch64/neon-stepvector.ll b/llvm/test/CodeGen/AArch64/neon-stepvector.ll
index 7a8a3c3..d5a3581 100644
--- a/llvm/test/CodeGen/AArch64/neon-stepvector.ll
+++ b/llvm/test/CodeGen/AArch64/neon-stepvector.ll
@@ -139,13 +139,10 @@ entry:
}
define <2 x i32> @stepvector_v2i32() {
-; CHECK-LABEL: .LCPI6_0:
-; CHECK-NEXT: .word 0
-; CHECK-NEXT: .word 1
; CHECK-LABEL: stepvector_v2i32:
; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: adrp x8, .LCPI6_0
-; CHECK-NEXT: ldr d0, [x8, :lo12:.LCPI6_0]
+; CHECK-NEXT: mov x8, #4294967296 // =0x100000000
+; CHECK-NEXT: fmov d0, x8
; CHECK-NEXT: ret
entry:
%0 = call <2 x i32> @llvm.stepvector.v2i32()
diff --git a/llvm/test/CodeGen/AArch64/pr58350.ll b/llvm/test/CodeGen/AArch64/pr58350.ll
index f7efab1..0daeb9d 100644
--- a/llvm/test/CodeGen/AArch64/pr58350.ll
+++ b/llvm/test/CodeGen/AArch64/pr58350.ll
@@ -10,13 +10,13 @@ define void @f(<1 x float> %a, i64 %b) {
; CHECK: // %bb.0:
; CHECK-NEXT: sub sp, sp, #16
; CHECK-NEXT: .cfi_def_cfa_offset 16
-; CHECK-NEXT: adrp x8, .LCPI0_0
-; CHECK-NEXT: mov x9, sp
+; CHECK-NEXT: mov x8, sp
+; CHECK-NEXT: mov x9, #1056964608 // =0x3f000000
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI0_0]
-; CHECK-NEXT: bfi x9, x0, #2, #1
-; CHECK-NEXT: str d1, [sp]
-; CHECK-NEXT: ldr s1, [x9]
+; CHECK-NEXT: bfi x8, x0, #2, #1
+; CHECK-NEXT: movk x9, #16256, lsl #48
+; CHECK-NEXT: str x9, [sp]
+; CHECK-NEXT: ldr s1, [x8]
; CHECK-NEXT: mov v1.s[1], v0.s[0]
; CHECK-NEXT: str d1, [sp, #8]
; CHECK-NEXT: add sp, sp, #16
diff --git a/llvm/test/CodeGen/AArch64/shuffle-tbl34.ll b/llvm/test/CodeGen/AArch64/shuffle-tbl34.ll
index 15e4887..eac7407 100644
--- a/llvm/test/CodeGen/AArch64/shuffle-tbl34.ll
+++ b/llvm/test/CodeGen/AArch64/shuffle-tbl34.ll
@@ -244,18 +244,9 @@ define <4 x i32> @shuffle4_v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c, <4 x
ret <4 x i32> %z
}
-; CHECK: .LCPI6_0:
-; CHECK: .byte 0 // 0x0
-; CHECK: .byte 7 // 0x7
-; CHECK: .byte 255 // 0xff
-; CHECK: .byte 1 // 0x1
-; CHECK: .byte 255 // 0xff
-; CHECK: .byte 255 // 0xff
-; CHECK: .byte 255 // 0xff
-; CHECK: .byte 255 // 0xff
; CHECK: .section .rodata.cst16,"aM",@progbits,16
; CHECK: .p2align 4
-; CHECK: .LCPI6_1:
+; CHECK: .LCPI6_0:
; CHECK: .byte 0 // 0x0
; CHECK: .byte 16 // 0x10
; CHECK: .byte 19 // 0x13
@@ -276,15 +267,16 @@ define <16 x i8> @shuffle4_v8i8_v16i8(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c, <8
; CHECK-LABEL: shuffle4_v8i8_v16i8:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
+; CHECK-NEXT: mov x8, #-63744 // =0xffffffffffff0700
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT: adrp x8, .LCPI6_0
; CHECK-NEXT: mov v2.d[1], v2.d[0]
+; CHECK-NEXT: movk x8, #511, lsl #16
; CHECK-NEXT: mov v0.d[1], v0.d[0]
-; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI6_0]
-; CHECK-NEXT: adrp x8, .LCPI6_1
+; CHECK-NEXT: fmov d1, x8
+; CHECK-NEXT: adrp x8, .LCPI6_0
; CHECK-NEXT: tbl v3.8b, { v2.16b }, v1.8b
; CHECK-NEXT: tbl v2.8b, { v0.16b }, v1.8b
-; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI6_1]
+; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI6_0]
; CHECK-NEXT: tbl v0.16b, { v2.16b, v3.16b }, v0.16b
; CHECK-NEXT: ret
%x = shufflevector <8 x i8> %a, <8 x i8> %b, <4 x i32> <i32 0, i32 7, i32 5, i32 1>
@@ -295,15 +287,6 @@ define <16 x i8> @shuffle4_v8i8_v16i8(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c, <8
; CHECK: .LCPI7_0:
; CHECK: .byte 0 // 0x0
-; CHECK: .byte 7 // 0x7
-; CHECK: .byte 255 // 0xff
-; CHECK: .byte 1 // 0x1
-; CHECK: .byte 255 // 0xff
-; CHECK: .byte 255 // 0xff
-; CHECK: .byte 255 // 0xff
-; CHECK: .byte 255 // 0xff
-; CHECK: .LCPI7_1:
-; CHECK: .byte 0 // 0x0
; CHECK: .byte 8 // 0x8
; CHECK: .byte 11 // 0xb
; CHECK: .byte 3 // 0x3
@@ -316,14 +299,15 @@ define <8 x i8> @shuffle4_v8i8_v8i8(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c, <8 x
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT: adrp x8, .LCPI7_0
+; CHECK-NEXT: mov x8, #-63744 // =0xffffffffffff0700
+; CHECK-NEXT: movk x8, #511, lsl #16
; CHECK-NEXT: mov v2.d[1], v2.d[0]
; CHECK-NEXT: mov v0.d[1], v0.d[0]
-; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI7_0]
-; CHECK-NEXT: adrp x8, .LCPI7_1
+; CHECK-NEXT: fmov d1, x8
+; CHECK-NEXT: adrp x8, .LCPI7_0
; CHECK-NEXT: tbl v2.8b, { v2.16b }, v1.8b
; CHECK-NEXT: tbl v0.8b, { v0.16b }, v1.8b
-; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI7_1]
+; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI7_0]
; CHECK-NEXT: mov v0.d[1], v2.d[0]
; CHECK-NEXT: tbl v0.8b, { v0.16b }, v1.8b
; CHECK-NEXT: ret
@@ -531,18 +515,9 @@ define <4 x i32> @shuffle3_v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
ret <4 x i32> %z
}
-; CHECK: .LCPI14_0:
-; CHECK: .byte 4 // 0x4
-; CHECK: .byte 8 // 0x8
-; CHECK: .byte 255 // 0xff
-; CHECK: .byte 255 // 0xff
-; CHECK: .byte 14 // 0xe
-; CHECK: .byte 3 // 0x3
-; CHECK: .byte 255 // 0xff
-; CHECK: .byte 255 // 0xff
; CHECK: .section .rodata.cst16,"aM",@progbits,16
; CHECK: .p2align 4
-; CHECK: .LCPI14_1:
+; CHECK: .LCPI14_0:
; CHECK: .byte 255 // 0xff
; CHECK: .byte 255 // 0xff
; CHECK: .byte 15 // 0xf
@@ -563,16 +538,17 @@ define <8 x i8> @insert4_v8i8(<8 x i8> %a, <16 x i8> %b, <8 x i8> %c, <16 x i8>
; CHECK-LABEL: insert4_v8i8:
; CHECK: // %bb.0:
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT: mov x9, #-63484 // =0xffffffffffff0804
; CHECK-NEXT: mov v4.16b, v3.16b
; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
; CHECK-NEXT: adrp x8, .LCPI14_0
-; CHECK-NEXT: adrp x9, .LCPI14_1
; CHECK-NEXT: mov v0.d[1], v2.d[0]
+; CHECK-NEXT: movk x9, #782, lsl #32
; CHECK-NEXT: mov v3.16b, v1.16b
-; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI14_0]
-; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI14_1]
-; CHECK-NEXT: tbl v0.8b, { v0.16b }, v1.8b
-; CHECK-NEXT: tbl v1.16b, { v3.16b, v4.16b }, v2.16b
+; CHECK-NEXT: fmov d2, x9
+; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI14_0]
+; CHECK-NEXT: tbl v1.16b, { v3.16b, v4.16b }, v1.16b
+; CHECK-NEXT: tbl v0.8b, { v0.16b }, v2.8b
; CHECK-NEXT: trn1 v0.4h, v1.4h, v0.4h
; CHECK-NEXT: trn2 v0.4h, v0.4h, v1.4h
; CHECK-NEXT: ret
diff --git a/llvm/test/CodeGen/AArch64/shuffles.ll b/llvm/test/CodeGen/AArch64/shuffles.ll
index 41dd7f0..930c3df 100644
--- a/llvm/test/CodeGen/AArch64/shuffles.ll
+++ b/llvm/test/CodeGen/AArch64/shuffles.ll
@@ -339,10 +339,11 @@ define <8 x i8> @test_shuf8(<8 x i8> %a, <8 x i8> %b)
; CHECKLE-LABEL: test_shuf8:
; CHECKLE: // %bb.0:
; CHECKLE-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECKLE-NEXT: mov x8, #8830452760576 // =0x80800000000
; CHECKLE-NEXT: // kill: def $d1 killed $d1 def $q1
-; CHECKLE-NEXT: adrp x8, .LCPI12_0
; CHECKLE-NEXT: mov v0.d[1], v1.d[0]
-; CHECKLE-NEXT: ldr d1, [x8, :lo12:.LCPI12_0]
+; CHECKLE-NEXT: movk x8, #2056, lsl #48
+; CHECKLE-NEXT: fmov d1, x8
; CHECKLE-NEXT: tbl v0.8b, { v0.16b }, v1.8b
; CHECKLE-NEXT: ret
;
diff --git a/llvm/test/CodeGen/AArch64/srem-vector-lkk.ll b/llvm/test/CodeGen/AArch64/srem-vector-lkk.ll
index b165ac0..52b0956 100644
--- a/llvm/test/CodeGen/AArch64/srem-vector-lkk.ll
+++ b/llvm/test/CodeGen/AArch64/srem-vector-lkk.ll
@@ -4,18 +4,18 @@
define <4 x i16> @fold_srem_vec_1(<4 x i16> %x) {
; CHECK-LABEL: fold_srem_vec_1:
; CHECK: // %bb.0:
-; CHECK-NEXT: adrp x8, .LCPI0_1
-; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI0_1]
; CHECK-NEXT: adrp x8, .LCPI0_0
-; CHECK-NEXT: ldr d2, [x8, :lo12:.LCPI0_0]
-; CHECK-NEXT: adrp x8, .LCPI0_2
+; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI0_0]
+; CHECK-NEXT: mov x8, #-281474976710655 // =0xffff000000000001
+; CHECK-NEXT: fmov d2, x8
+; CHECK-NEXT: adrp x8, .LCPI0_1
; CHECK-NEXT: smull v1.4s, v0.4h, v1.4h
; CHECK-NEXT: shrn v1.4h, v1.4s, #16
; CHECK-NEXT: mla v1.4h, v0.4h, v2.4h
-; CHECK-NEXT: ldr d2, [x8, :lo12:.LCPI0_2]
-; CHECK-NEXT: adrp x8, .LCPI0_3
+; CHECK-NEXT: ldr d2, [x8, :lo12:.LCPI0_1]
+; CHECK-NEXT: adrp x8, .LCPI0_2
; CHECK-NEXT: sshl v1.4h, v1.4h, v2.4h
-; CHECK-NEXT: ldr d2, [x8, :lo12:.LCPI0_3]
+; CHECK-NEXT: ldr d2, [x8, :lo12:.LCPI0_2]
; CHECK-NEXT: usra v1.4h, v1.4h, #15
; CHECK-NEXT: mls v0.4h, v1.4h, v2.4h
; CHECK-NEXT: ret
@@ -66,16 +66,17 @@ define <4 x i16> @combine_srem_sdiv(<4 x i16> %x) {
define <4 x i16> @dont_fold_srem_power_of_two(<4 x i16> %x) {
; CHECK-LABEL: dont_fold_srem_power_of_two:
; CHECK: // %bb.0:
+; CHECK-NEXT: mov x8, #-9222949817947160575 // =0x8001800180018001
+; CHECK-NEXT: movk x8, #44151, lsl #48
+; CHECK-NEXT: fmov d1, x8
; CHECK-NEXT: adrp x8, .LCPI3_0
-; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI3_0]
+; CHECK-NEXT: ldr d2, [x8, :lo12:.LCPI3_0]
; CHECK-NEXT: adrp x8, .LCPI3_1
-; CHECK-NEXT: ldr d2, [x8, :lo12:.LCPI3_1]
-; CHECK-NEXT: adrp x8, .LCPI3_2
; CHECK-NEXT: smull v1.4s, v0.4h, v1.4h
; CHECK-NEXT: shrn v1.4h, v1.4s, #16
; CHECK-NEXT: add v1.4h, v1.4h, v0.4h
; CHECK-NEXT: sshl v1.4h, v1.4h, v2.4h
-; CHECK-NEXT: ldr d2, [x8, :lo12:.LCPI3_2]
+; CHECK-NEXT: ldr d2, [x8, :lo12:.LCPI3_1]
; CHECK-NEXT: usra v1.4h, v1.4h, #15
; CHECK-NEXT: mls v0.4h, v1.4h, v2.4h
; CHECK-NEXT: ret
@@ -112,21 +113,22 @@ define <4 x i16> @dont_fold_srem_one(<4 x i16> %x) {
define <4 x i16> @dont_fold_srem_i16_smax(<4 x i16> %x) {
; CHECK-LABEL: dont_fold_srem_i16_smax:
; CHECK: // %bb.0:
-; CHECK-NEXT: adrp x8, .LCPI5_1
-; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI5_1]
; CHECK-NEXT: adrp x8, .LCPI5_0
-; CHECK-NEXT: ldr d2, [x8, :lo12:.LCPI5_0]
-; CHECK-NEXT: adrp x8, .LCPI5_2
+; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI5_0]
+; CHECK-NEXT: mov x8, #8589869056 // =0x1ffff0000
+; CHECK-NEXT: movk x8, #1
; CHECK-NEXT: smull v1.4s, v0.4h, v1.4h
+; CHECK-NEXT: fmov d2, x8
+; CHECK-NEXT: adrp x8, .LCPI5_1
; CHECK-NEXT: shrn v1.4h, v1.4s, #16
; CHECK-NEXT: mla v1.4h, v0.4h, v2.4h
-; CHECK-NEXT: ldr d2, [x8, :lo12:.LCPI5_2]
-; CHECK-NEXT: adrp x8, .LCPI5_3
+; CHECK-NEXT: ldr d2, [x8, :lo12:.LCPI5_1]
+; CHECK-NEXT: adrp x8, .LCPI5_2
; CHECK-NEXT: sshl v1.4h, v1.4h, v2.4h
; CHECK-NEXT: ushr v2.4h, v1.4h, #15
; CHECK-NEXT: mov v2.h[0], wzr
; CHECK-NEXT: add v1.4h, v1.4h, v2.4h
-; CHECK-NEXT: ldr d2, [x8, :lo12:.LCPI5_3]
+; CHECK-NEXT: ldr d2, [x8, :lo12:.LCPI5_2]
; CHECK-NEXT: mls v0.4h, v1.4h, v2.4h
; CHECK-NEXT: ret
%1 = srem <4 x i16> %x, <i16 1, i16 32768, i16 23, i16 5423>
diff --git a/llvm/test/CodeGen/AArch64/urem-seteq-illegal-types.ll b/llvm/test/CodeGen/AArch64/urem-seteq-illegal-types.ll
index c4f6e7d..d3a8540 100644
--- a/llvm/test/CodeGen/AArch64/urem-seteq-illegal-types.ll
+++ b/llvm/test/CodeGen/AArch64/urem-seteq-illegal-types.ll
@@ -66,23 +66,24 @@ define <3 x i1> @test_urem_vec(<3 x i11> %X) nounwind {
; CHECK-LABEL: test_urem_vec:
; CHECK: // %bb.0:
; CHECK-NEXT: fmov s0, w0
+; CHECK-NEXT: mov x8, #65536 // =0x10000
+; CHECK-NEXT: movk x8, #2, lsl #32
+; CHECK-NEXT: fmov d1, x8
; CHECK-NEXT: adrp x8, .LCPI4_0
-; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI4_0]
-; CHECK-NEXT: adrp x8, .LCPI4_1
; CHECK-NEXT: mov v0.h[1], w1
; CHECK-NEXT: mov v0.h[2], w2
; CHECK-NEXT: sub v0.4h, v0.4h, v1.4h
-; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI4_1]
+; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI4_0]
+; CHECK-NEXT: adrp x8, .LCPI4_1
+; CHECK-NEXT: ldr d3, [x8, :lo12:.LCPI4_1]
; CHECK-NEXT: adrp x8, .LCPI4_2
-; CHECK-NEXT: ldr d3, [x8, :lo12:.LCPI4_2]
-; CHECK-NEXT: adrp x8, .LCPI4_3
; CHECK-NEXT: mul v0.4h, v0.4h, v1.4h
; CHECK-NEXT: movi d1, #0x0000000000ffff
; CHECK-NEXT: add v2.4h, v0.4h, v0.4h
; CHECK-NEXT: bic v0.4h, #248, lsl #8
; CHECK-NEXT: ushl v2.4h, v2.4h, v3.4h
; CHECK-NEXT: ushl v0.4h, v0.4h, v1.4h
-; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI4_3]
+; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI4_2]
; CHECK-NEXT: orr v0.8b, v0.8b, v2.8b
; CHECK-NEXT: bic v0.4h, #248, lsl #8
; CHECK-NEXT: cmhi v0.4h, v0.4h, v1.4h
diff --git a/llvm/test/CodeGen/AArch64/urem-vector-lkk.ll b/llvm/test/CodeGen/AArch64/urem-vector-lkk.ll
index 4be8c37..7c76479 100644
--- a/llvm/test/CodeGen/AArch64/urem-vector-lkk.ll
+++ b/llvm/test/CodeGen/AArch64/urem-vector-lkk.ll
@@ -4,11 +4,11 @@
define <4 x i16> @fold_urem_vec_1(<4 x i16> %x) {
; CHECK-LABEL: fold_urem_vec_1:
; CHECK: // %bb.0:
+; CHECK-NEXT: mov x8, #281474976579584 // =0xfffffffe0000
+; CHECK-NEXT: fmov d1, x8
; CHECK-NEXT: adrp x8, .LCPI0_0
-; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI0_0]
+; CHECK-NEXT: ldr d2, [x8, :lo12:.LCPI0_0]
; CHECK-NEXT: adrp x8, .LCPI0_1
-; CHECK-NEXT: ldr d2, [x8, :lo12:.LCPI0_1]
-; CHECK-NEXT: adrp x8, .LCPI0_2
; CHECK-NEXT: ushl v1.4h, v0.4h, v1.4h
; CHECK-NEXT: umull v1.4s, v1.4h, v2.4h
; CHECK-NEXT: movi d2, #0000000000000000
@@ -18,10 +18,10 @@ define <4 x i16> @fold_urem_vec_1(<4 x i16> %x) {
; CHECK-NEXT: umull v2.4s, v3.4h, v2.4h
; CHECK-NEXT: shrn v2.4h, v2.4s, #16
; CHECK-NEXT: add v1.4h, v2.4h, v1.4h
-; CHECK-NEXT: ldr d2, [x8, :lo12:.LCPI0_2]
-; CHECK-NEXT: adrp x8, .LCPI0_3
+; CHECK-NEXT: ldr d2, [x8, :lo12:.LCPI0_1]
+; CHECK-NEXT: adrp x8, .LCPI0_2
; CHECK-NEXT: ushl v1.4h, v1.4h, v2.4h
-; CHECK-NEXT: ldr d2, [x8, :lo12:.LCPI0_3]
+; CHECK-NEXT: ldr d2, [x8, :lo12:.LCPI0_2]
; CHECK-NEXT: mls v0.4h, v1.4h, v2.4h
; CHECK-NEXT: ret
%1 = urem <4 x i16> %x, <i16 95, i16 124, i16 98, i16 1003>
@@ -70,13 +70,13 @@ define <4 x i16> @dont_fold_urem_power_of_two(<4 x i16> %x) {
; CHECK: // %bb.0:
; CHECK-NEXT: adrp x8, .LCPI3_0
; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI3_0]
+; CHECK-NEXT: mov x8, #-1688849860263936 // =0xfffa000000000000
+; CHECK-NEXT: fmov d2, x8
; CHECK-NEXT: adrp x8, .LCPI3_1
-; CHECK-NEXT: ldr d2, [x8, :lo12:.LCPI3_1]
-; CHECK-NEXT: adrp x8, .LCPI3_2
; CHECK-NEXT: umull v1.4s, v0.4h, v1.4h
; CHECK-NEXT: shrn v1.4h, v1.4s, #16
; CHECK-NEXT: ushl v1.4h, v1.4h, v2.4h
-; CHECK-NEXT: ldr d2, [x8, :lo12:.LCPI3_2]
+; CHECK-NEXT: ldr d2, [x8, :lo12:.LCPI3_1]
; CHECK-NEXT: mls v0.4h, v1.4h, v2.4h
; CHECK-NEXT: ret
%1 = urem <4 x i16> %x, <i16 64, i16 32, i16 8, i16 95>
@@ -89,9 +89,9 @@ define <4 x i16> @dont_fold_urem_one(<4 x i16> %x) {
; CHECK: // %bb.0:
; CHECK-NEXT: adrp x8, .LCPI4_0
; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI4_0]
+; CHECK-NEXT: mov x8, #140737488355328 // =0x800000000000
+; CHECK-NEXT: fmov d3, x8
; CHECK-NEXT: adrp x8, .LCPI4_1
-; CHECK-NEXT: ldr d3, [x8, :lo12:.LCPI4_1]
-; CHECK-NEXT: adrp x8, .LCPI4_2
; CHECK-NEXT: umull v1.4s, v0.4h, v1.4h
; CHECK-NEXT: shrn v1.4h, v1.4s, #16
; CHECK-NEXT: sub v2.4h, v0.4h, v1.4h
@@ -99,10 +99,10 @@ define <4 x i16> @dont_fold_urem_one(<4 x i16> %x) {
; CHECK-NEXT: movi d3, #0x0000000000ffff
; CHECK-NEXT: shrn v2.4h, v2.4s, #16
; CHECK-NEXT: add v1.4h, v2.4h, v1.4h
-; CHECK-NEXT: ldr d2, [x8, :lo12:.LCPI4_2]
-; CHECK-NEXT: adrp x8, .LCPI4_3
+; CHECK-NEXT: ldr d2, [x8, :lo12:.LCPI4_1]
+; CHECK-NEXT: adrp x8, .LCPI4_2
; CHECK-NEXT: ushl v1.4h, v1.4h, v2.4h
-; CHECK-NEXT: ldr d2, [x8, :lo12:.LCPI4_3]
+; CHECK-NEXT: ldr d2, [x8, :lo12:.LCPI4_2]
; CHECK-NEXT: bit v1.8b, v0.8b, v3.8b
; CHECK-NEXT: mls v0.4h, v1.4h, v2.4h
; CHECK-NEXT: ret
diff --git a/llvm/test/CodeGen/AArch64/vec-combine-compare-to-bitmask.ll b/llvm/test/CodeGen/AArch64/vec-combine-compare-to-bitmask.ll
index f584595..af8cda4 100644
--- a/llvm/test/CodeGen/AArch64/vec-combine-compare-to-bitmask.ll
+++ b/llvm/test/CodeGen/AArch64/vec-combine-compare-to-bitmask.ll
@@ -558,9 +558,10 @@ define i16 @convert_to_bitmask_without_knowing_type(<16 x i1> %vec) {
define i2 @convert_to_bitmask_2xi32(<2 x i32> %vec) {
; CHECK-LABEL: convert_to_bitmask_2xi32:
; CHECK: ; %bb.0:
-; CHECK-NEXT: adrp x8, lCPI11_0@PAGE
+; CHECK-NEXT: mov x8, #1 ; =0x1
; CHECK-NEXT: cmeq.2s v0, v0, #0
-; CHECK-NEXT: ldr d1, [x8, lCPI11_0@PAGEOFF]
+; CHECK-NEXT: movk x8, #2, lsl #32
+; CHECK-NEXT: fmov d1, x8
; CHECK-NEXT: bic.8b v0, v1, v0
; CHECK-NEXT: addp.2s v0, v0, v0
; CHECK-NEXT: fmov w0, s0
diff --git a/llvm/test/CodeGen/AArch64/vec-combine-compare-truncate-store.ll b/llvm/test/CodeGen/AArch64/vec-combine-compare-truncate-store.ll
index 3c42079..f3c4856 100644
--- a/llvm/test/CodeGen/AArch64/vec-combine-compare-truncate-store.ll
+++ b/llvm/test/CodeGen/AArch64/vec-combine-compare-truncate-store.ll
@@ -169,16 +169,14 @@ define void @store_4_elements_64_bit_vector(<4 x i16> %vec, ptr %out) {
define void @store_2_elements_64_bit_vector(<2 x i32> %vec, ptr %out) {
; CHECK-LABEL: store_2_elements_64_bit_vector:
; CHECK: ; %bb.0:
-; CHECK-NEXT: Lloh16:
-; CHECK-NEXT: adrp x8, lCPI8_0@PAGE
+; CHECK-NEXT: mov x8, #1 ; =0x1
; CHECK-NEXT: cmeq.2s v0, v0, #0
-; CHECK-NEXT: Lloh17:
-; CHECK-NEXT: ldr d1, [x8, lCPI8_0@PAGEOFF]
+; CHECK-NEXT: movk x8, #2, lsl #32
+; CHECK-NEXT: fmov d1, x8
; CHECK-NEXT: bic.8b v0, v1, v0
; CHECK-NEXT: addp.2s v0, v0, v0
; CHECK-NEXT: str b0, [x0]
; CHECK-NEXT: ret
-; CHECK-NEXT: .loh AdrpLdr Lloh16, Lloh17
%cmp_result = icmp ne <2 x i32> %vec, zeroinitializer
diff --git a/llvm/test/CodeGen/AArch64/vector-extract-last-active.ll b/llvm/test/CodeGen/AArch64/vector-extract-last-active.ll
index 81382ee..876d2f6 100644
--- a/llvm/test/CodeGen/AArch64/vector-extract-last-active.ll
+++ b/llvm/test/CodeGen/AArch64/vector-extract-last-active.ll
@@ -152,9 +152,9 @@ define i64 @extract_last_i64(<2 x i64> %data, <2 x i64> %mask, i64 %passthru) {
; NEON-FIXED-NEXT: sub sp, sp, #16
; NEON-FIXED-NEXT: .cfi_def_cfa_offset 16
; NEON-FIXED-NEXT: cmtst v1.2d, v1.2d, v1.2d
-; NEON-FIXED-NEXT: adrp x8, .LCPI3_0
+; NEON-FIXED-NEXT: mov x8, #4294967296 // =0x100000000
; NEON-FIXED-NEXT: mov x9, sp
-; NEON-FIXED-NEXT: ldr d3, [x8, :lo12:.LCPI3_0]
+; NEON-FIXED-NEXT: fmov d3, x8
; NEON-FIXED-NEXT: str q0, [sp]
; NEON-FIXED-NEXT: xtn v2.2s, v1.2d
; NEON-FIXED-NEXT: umaxv s1, v1.4s
@@ -348,9 +348,9 @@ define double @extract_last_double(<2 x double> %data, <2 x i64> %mask, double %
; NEON-FIXED-NEXT: sub sp, sp, #16
; NEON-FIXED-NEXT: .cfi_def_cfa_offset 16
; NEON-FIXED-NEXT: cmtst v1.2d, v1.2d, v1.2d
-; NEON-FIXED-NEXT: adrp x8, .LCPI7_0
+; NEON-FIXED-NEXT: mov x8, #4294967296 // =0x100000000
; NEON-FIXED-NEXT: mov x9, sp
-; NEON-FIXED-NEXT: ldr d4, [x8, :lo12:.LCPI7_0]
+; NEON-FIXED-NEXT: fmov d4, x8
; NEON-FIXED-NEXT: str q0, [sp]
; NEON-FIXED-NEXT: xtn v3.2s, v1.2d
; NEON-FIXED-NEXT: umaxv s1, v1.4s