diff options
21 files changed, 280 insertions, 171 deletions
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 713b40d..22418e6 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -16379,6 +16379,45 @@ SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op, return Val; } + // Handle 64-bit constant BUILD_VECTORs by packing them into an i64 immediate. + // This is cheaper than a load if the immediate can be materialized in a few + // mov instructions. This optimization is disabled for big-endian targets for + // now. + if (BVN->isConstant() && VT.isFixedLengthVector() && + VT.getSizeInBits() == 64 && !DAG.getDataLayout().isBigEndian()) { + const SDLoc DL(Op); + APInt PackedVal(64, 0); + unsigned BitPos = 0; + + unsigned EltSizeInBits = VT.getScalarSizeInBits(); + for (unsigned i = 0, e = BVN->getNumOperands(); i != e; ++i) { + const SDValue &LaneOp = BVN->getOperand(i); + APInt LaneBits; + if (LaneOp.getOpcode() == ISD::UNDEF) + LaneBits = APInt(EltSizeInBits, 0); + else if (auto *C = dyn_cast<ConstantSDNode>(LaneOp)) + LaneBits = C->getAPIntValue(); + else if (auto *CFP = dyn_cast<ConstantFPSDNode>(LaneOp)) + LaneBits = CFP->getValueAPF().bitcastToAPInt(); + else + return SDValue(); + + PackedVal |= LaneBits.trunc(VT.getScalarSizeInBits()).zext(64) << BitPos; + BitPos += EltSizeInBits; + } + + // This optimization kicks in if the number of mov instructions + // is under 2 + SmallVector<AArch64_IMM::ImmInsnModel, 4> Insns; + AArch64_IMM::expandMOVImm(PackedVal.getZExtValue(), 64, Insns); + if (Insns.size() > 2) + return SDValue(); + + SDValue ScalarConst = DAG.getConstant(PackedVal, DL, MVT::i64); + // Use BITCAST to reinterpret the scalar constant's bits as a vector. + return DAG.getNode(ISD::BITCAST, DL, VT, ScalarConst); + } + // This will generate a load from the constant pool. if (isConstant) { LLVM_DEBUG( diff --git a/llvm/test/CodeGen/AArch64/arm64-neon-simd-ldst-one.ll b/llvm/test/CodeGen/AArch64/arm64-neon-simd-ldst-one.ll index 7483eab..34499c6 100644 --- a/llvm/test/CodeGen/AArch64/arm64-neon-simd-ldst-one.ll +++ b/llvm/test/CodeGen/AArch64/arm64-neon-simd-ldst-one.ll @@ -140,12 +140,20 @@ entry: } define <2 x i32> @test_ld_from_poll_v2i32(<2 x i32> %a) { -; CHECK-LABEL: test_ld_from_poll_v2i32: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: adrp x8, .LCPI8_0 -; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI8_0] -; CHECK-NEXT: add v0.2s, v0.2s, v1.2s -; CHECK-NEXT: ret +; CHECK-SD-LABEL: test_ld_from_poll_v2i32: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: mov x8, #1 // =0x1 +; CHECK-SD-NEXT: movk x8, #2, lsl #32 +; CHECK-SD-NEXT: fmov d1, x8 +; CHECK-SD-NEXT: add v0.2s, v0.2s, v1.2s +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: test_ld_from_poll_v2i32: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: adrp x8, .LCPI8_0 +; CHECK-GI-NEXT: ldr d1, [x8, :lo12:.LCPI8_0] +; CHECK-GI-NEXT: add v0.2s, v0.2s, v1.2s +; CHECK-GI-NEXT: ret entry: %b = add <2 x i32> %a, <i32 1, i32 2> ret <2 x i32> %b diff --git a/llvm/test/CodeGen/AArch64/arm64-sli-sri-opt.ll b/llvm/test/CodeGen/AArch64/arm64-sli-sri-opt.ll index 0e1e15f..e80afe2 100644 --- a/llvm/test/CodeGen/AArch64/arm64-sli-sri-opt.ll +++ b/llvm/test/CodeGen/AArch64/arm64-sli-sri-opt.ll @@ -684,15 +684,26 @@ define void @testLeftShouldNotCreateSLI1x128(<1 x i128> %src1, <1 x i128> %src2, } define void @testLeftNotAllConstantBuildVec8x8(<8 x i8> %src1, <8 x i8> %src2, ptr %dest) nounwind { -; CHECK-LABEL: testLeftNotAllConstantBuildVec8x8: -; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI29_0 -; CHECK-NEXT: shl.8b v1, v1, #3 -; CHECK-NEXT: ldr d2, [x8, :lo12:.LCPI29_0] -; CHECK-NEXT: and.8b v0, v0, v2 -; CHECK-NEXT: orr.8b v0, v0, v1 -; CHECK-NEXT: str d0, [x0] -; CHECK-NEXT: ret +; CHECK-SD-LABEL: testLeftNotAllConstantBuildVec8x8: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: mov x8, #506381209866536711 // =0x707070707070707 +; CHECK-SD-NEXT: shl.8b v1, v1, #3 +; CHECK-SD-NEXT: orr x8, x8, #0x7f8000007f80000 +; CHECK-SD-NEXT: fmov d2, x8 +; CHECK-SD-NEXT: and.8b v0, v0, v2 +; CHECK-SD-NEXT: orr.8b v0, v0, v1 +; CHECK-SD-NEXT: str d0, [x0] +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: testLeftNotAllConstantBuildVec8x8: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: adrp x8, .LCPI29_0 +; CHECK-GI-NEXT: shl.8b v1, v1, #3 +; CHECK-GI-NEXT: ldr d2, [x8, :lo12:.LCPI29_0] +; CHECK-GI-NEXT: and.8b v0, v0, v2 +; CHECK-GI-NEXT: orr.8b v0, v0, v1 +; CHECK-GI-NEXT: str d0, [x0] +; CHECK-GI-NEXT: ret %and.i = and <8 x i8> %src1, <i8 7, i8 7, i8 255, i8 7, i8 7, i8 7, i8 255, i8 7> %vshl_n = shl <8 x i8> %src2, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3> %result = or <8 x i8> %and.i, %vshl_n diff --git a/llvm/test/CodeGen/AArch64/arm64-tbl.ll b/llvm/test/CodeGen/AArch64/arm64-tbl.ll index fe5a6f1..27694bd 100644 --- a/llvm/test/CodeGen/AArch64/arm64-tbl.ll +++ b/llvm/test/CodeGen/AArch64/arm64-tbl.ll @@ -140,16 +140,6 @@ define <16 x i8> @tbl4_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> % ret <16 x i8> %tmp3 } -; CHECK-SD-LABEL: .LCPI8_0: -; CHECK-SD: .byte 0 // 0x0 -; CHECK-SD-NEXT: .byte 4 // 0x4 -; CHECK-SD-NEXT: .byte 8 // 0x8 -; CHECK-SD-NEXT: .byte 12 // 0xc -; CHECK-SD-NEXT: .byte 255 // 0xff -; CHECK-SD-NEXT: .byte 255 // 0xff -; CHECK-SD-NEXT: .byte 255 // 0xff -; CHECK-SD-NEXT: .byte 255 // 0xff - ; CHECK-GI-LABEL: .LCPI8_0: ; CHECK-GI: .byte 0 // 0x0 ; CHECK-GI-NEXT: .byte 1 // 0x1 @@ -172,12 +162,13 @@ define <16 x i8> @tbl4_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> % define <8 x i8> @shuffled_tbl2_to_tbl4_v8i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i8> %d) { ; CHECK-SD-LABEL: shuffled_tbl2_to_tbl4_v8i8: ; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: adrp x8, .LCPI8_0 +; CHECK-SD-NEXT: mov x8, #-64512 // =0xffffffffffff0400 ; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: // kill: def $q3 killed $q3 killed $q2_q3 def $q2_q3 -; CHECK-SD-NEXT: ldr d4, [x8, :lo12:.LCPI8_0] +; CHECK-SD-NEXT: movk x8, #3080, lsl #16 ; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: // kill: def $q2 killed $q2 killed $q2_q3 def $q2_q3 +; CHECK-SD-NEXT: fmov d4, x8 ; CHECK-SD-NEXT: tbl.8b v0, { v0, v1 }, v4 ; CHECK-SD-NEXT: tbl.8b v1, { v2, v3 }, v4 ; CHECK-SD-NEXT: mov.s v0[1], v1[1] diff --git a/llvm/test/CodeGen/AArch64/const-vector-big-endian.ll b/llvm/test/CodeGen/AArch64/const-vector-big-endian.ll new file mode 100644 index 0000000..0b49b7f --- /dev/null +++ b/llvm/test/CodeGen/AArch64/const-vector-big-endian.ll @@ -0,0 +1,27 @@ +; Verify if materialization is not kicking in for big-endian targets +; RUN: llc -mtriple=aarch64_be-linux-gnu -o - %s | FileCheck %s + +define <2 x i32> @test_const_v2i32_big_endian() { +; CHECK-LABEL: test_const_v2i32_big_endian: +; CHECK: ldr d0, [x8, :lo12:.LCPI0_0] +; CHECK-NOT: mov +; CHECK: ret + ret <2 x i32> <i32 1, i32 2> +} + +define <4 x i16> @test_const_v4i16_big_endian() { +; CHECK-LABEL: test_const_v4i16_big_endian: +; CHECK: ldr d0, [x8, :lo12:.LCPI1_0] +; CHECK-NOT: mov +; CHECK: ret + ret <4 x i16> <i16 1, i16 2, i16 3, i16 4> +} + +define <8 x i8> @test_const_v8i8_big_endian() { +; CHECK-LABEL: test_const_v8i8_big_endian: +; CHECK: ldr d0, [x8, :lo12:.LCPI2_0] +; CHECK-NOT: mov +; CHECK: ret + ret <8 x i8> <i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8> +} + diff --git a/llvm/test/CodeGen/AArch64/constant-pool-partition.ll b/llvm/test/CodeGen/AArch64/constant-pool-partition.ll index 9f4b3e2..7e74ab4 100644 --- a/llvm/test/CodeGen/AArch64/constant-pool-partition.ll +++ b/llvm/test/CodeGen/AArch64/constant-pool-partition.ll @@ -30,27 +30,27 @@ ; CHECK-NEXT: .section .rodata.cst8,"aM",@progbits,8 ; CHECK-NEXT: .p2align ; CHECK-NEXT: .LCPI0_2: -; CHECK-NEXT: .byte 0 // 0x0 +; CHECK-NEXT: .byte 1 // 0x1 +; CHECK-NEXT: .byte 2 // 0x2 +; CHECK-NEXT: .byte 3 // 0x3 ; CHECK-NEXT: .byte 4 // 0x4 +; CHECK-NEXT: .byte 5 // 0x5 +; CHECK-NEXT: .byte 6 // 0x6 +; CHECK-NEXT: .byte 7 // 0x7 ; CHECK-NEXT: .byte 8 // 0x8 -; CHECK-NEXT: .byte 12 // 0xc -; CHECK-NEXT: .byte 255 // 0xff -; CHECK-NEXT: .byte 255 // 0xff -; CHECK-NEXT: .byte 255 // 0xff -; CHECK-NEXT: .byte 255 // 0xff ;; Constant pools for function @unprofiled_func ; CHECK: .section .rodata.cst8,"aM",@progbits,8 ; CHECK-NEXT: .p2align ; CHECK-NEXT: .LCPI1_0: -; CHECK-NEXT: .byte 0 // 0x0 +; CHECK-NEXT: .byte 1 // 0x1 +; CHECK-NEXT: .byte 2 // 0x2 +; CHECK-NEXT: .byte 3 // 0x3 ; CHECK-NEXT: .byte 4 // 0x4 +; CHECK-NEXT: .byte 5 // 0x5 +; CHECK-NEXT: .byte 6 // 0x6 +; CHECK-NEXT: .byte 7 // 0x7 ; CHECK-NEXT: .byte 8 // 0x8 -; CHECK-NEXT: .byte 12 // 0xc -; CHECK-NEXT: .byte 255 // 0xff -; CHECK-NEXT: .byte 255 // 0xff -; CHECK-NEXT: .byte 255 // 0xff -; CHECK-NEXT: .byte 255 // 0xff ; CHECK-NEXT: .section .rodata.cst16,"aM",@progbits,16 ; CHECK-NEXT: .p2align ; CHECK-NEXT: .LCPI1_1: @@ -94,7 +94,7 @@ define i32 @cold_func(double %x, <16 x i8> %a, <16 x i8> %b) !prof !16 { %2 = tail call i32 (...) @func_taking_arbitrary_param(double 6.800000e-01) %num = tail call i32 (...) @func_taking_arbitrary_param(double 6.8500000e-01) - %t1 = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> %a, <16 x i8> %b, <8 x i8> <i8 0, i8 4, i8 8, i8 12, i8 -1, i8 -1, i8 -1, i8 -1>) + %t1 = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> %a, <16 x i8> %b, <8 x i8> <i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8>) %t2 = bitcast <8 x i8> %t1 to <2 x i32> %3 = extractelement <2 x i32> %t2, i32 1 %sum = add i32 %2, %3 @@ -106,7 +106,7 @@ declare <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8>, <16 x i8>, <8 x i8>) declare i32 @func_taking_arbitrary_param(...) define <4 x i1> @unprofiled_func(<16 x i8> %a, <16 x i8> %b) { - %t1 = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> %a, <16 x i8> %b, <8 x i8> <i8 0, i8 4, i8 8, i8 12, i8 -1, i8 -1, i8 -1, i8 -1>) + %t1 = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> %a, <16 x i8> %b, <8 x i8> <i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8>) %t2 = bitcast <8 x i8> %t1 to <4 x i16> %t3 = zext <4 x i16> %t2 to <4 x i32> %t4 = add <4 x i32> %t3, <i32 2, i32 3, i32 5, i32 7> diff --git a/llvm/test/CodeGen/AArch64/extract-vector-elt.ll b/llvm/test/CodeGen/AArch64/extract-vector-elt.ll index babb4ed..23bc6e0 100644 --- a/llvm/test/CodeGen/AArch64/extract-vector-elt.ll +++ b/llvm/test/CodeGen/AArch64/extract-vector-elt.ll @@ -1037,10 +1037,11 @@ entry: define i32 @extract_v4i32_phi(i64 %val, i32 %limit, ptr %ptr) { ; CHECK-SD-LABEL: extract_v4i32_phi: ; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: mov x8, #1 // =0x1 ; CHECK-SD-NEXT: dup v1.2s, w0 -; CHECK-SD-NEXT: adrp x8, .LCPI41_0 ; CHECK-SD-NEXT: movi v0.2s, #16 -; CHECK-SD-NEXT: ldr d2, [x8, :lo12:.LCPI41_0] +; CHECK-SD-NEXT: movk x8, #2, lsl #32 +; CHECK-SD-NEXT: fmov d2, x8 ; CHECK-SD-NEXT: add v1.2s, v1.2s, v2.2s ; CHECK-SD-NEXT: .LBB41_1: // %loop ; CHECK-SD-NEXT: // =>This Inner Loop Header: Depth=1 diff --git a/llvm/test/CodeGen/AArch64/fcvt_combine.ll b/llvm/test/CodeGen/AArch64/fcvt_combine.ll index 38defb6..c190c63 100644 --- a/llvm/test/CodeGen/AArch64/fcvt_combine.ll +++ b/llvm/test/CodeGen/AArch64/fcvt_combine.ll @@ -141,13 +141,39 @@ define <2 x i32> @test8(<2 x float> %f) { ; Test which should not fold due to non-matching power of 2. define <2 x i32> @test9(<2 x float> %f) { -; CHECK-LABEL: test9: -; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI8_0 -; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI8_0] -; CHECK-NEXT: fmul v0.2s, v0.2s, v1.2s -; CHECK-NEXT: fcvtzu v0.2s, v0.2s -; CHECK-NEXT: ret +; CHECK-NO16-SD-LABEL: test9: +; CHECK-NO16-SD: // %bb.0: +; CHECK-NO16-SD-NEXT: mov x8, #1098907648 // =0x41800000 +; CHECK-NO16-SD-NEXT: movk x8, #16640, lsl #48 +; CHECK-NO16-SD-NEXT: fmov d1, x8 +; CHECK-NO16-SD-NEXT: fmul v0.2s, v0.2s, v1.2s +; CHECK-NO16-SD-NEXT: fcvtzu v0.2s, v0.2s +; CHECK-NO16-SD-NEXT: ret +; +; CHECK-FP16-SD-LABEL: test9: +; CHECK-FP16-SD: // %bb.0: +; CHECK-FP16-SD-NEXT: mov x8, #1098907648 // =0x41800000 +; CHECK-FP16-SD-NEXT: movk x8, #16640, lsl #48 +; CHECK-FP16-SD-NEXT: fmov d1, x8 +; CHECK-FP16-SD-NEXT: fmul v0.2s, v0.2s, v1.2s +; CHECK-FP16-SD-NEXT: fcvtzu v0.2s, v0.2s +; CHECK-FP16-SD-NEXT: ret +; +; CHECK-NO16-GI-LABEL: test9: +; CHECK-NO16-GI: // %bb.0: +; CHECK-NO16-GI-NEXT: adrp x8, .LCPI8_0 +; CHECK-NO16-GI-NEXT: ldr d1, [x8, :lo12:.LCPI8_0] +; CHECK-NO16-GI-NEXT: fmul v0.2s, v0.2s, v1.2s +; CHECK-NO16-GI-NEXT: fcvtzu v0.2s, v0.2s +; CHECK-NO16-GI-NEXT: ret +; +; CHECK-FP16-GI-LABEL: test9: +; CHECK-FP16-GI: // %bb.0: +; CHECK-FP16-GI-NEXT: adrp x8, .LCPI8_0 +; CHECK-FP16-GI-NEXT: ldr d1, [x8, :lo12:.LCPI8_0] +; CHECK-FP16-GI-NEXT: fmul v0.2s, v0.2s, v1.2s +; CHECK-FP16-GI-NEXT: fcvtzu v0.2s, v0.2s +; CHECK-FP16-GI-NEXT: ret %mul.i = fmul <2 x float> %f, <float 16.000000e+00, float 8.000000e+00> %vcvt.i = fptoui <2 x float> %mul.i to <2 x i32> ret <2 x i32> %vcvt.i @@ -684,13 +710,39 @@ define <2 x i32> @test8_sat(<2 x float> %f) { ; Test which should not fold due to non-matching power of 2. define <2 x i32> @test9_sat(<2 x float> %f) { -; CHECK-LABEL: test9_sat: -; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI27_0 -; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI27_0] -; CHECK-NEXT: fmul v0.2s, v0.2s, v1.2s -; CHECK-NEXT: fcvtzu v0.2s, v0.2s -; CHECK-NEXT: ret +; CHECK-NO16-SD-LABEL: test9_sat: +; CHECK-NO16-SD: // %bb.0: +; CHECK-NO16-SD-NEXT: mov x8, #1098907648 // =0x41800000 +; CHECK-NO16-SD-NEXT: movk x8, #16640, lsl #48 +; CHECK-NO16-SD-NEXT: fmov d1, x8 +; CHECK-NO16-SD-NEXT: fmul v0.2s, v0.2s, v1.2s +; CHECK-NO16-SD-NEXT: fcvtzu v0.2s, v0.2s +; CHECK-NO16-SD-NEXT: ret +; +; CHECK-FP16-SD-LABEL: test9_sat: +; CHECK-FP16-SD: // %bb.0: +; CHECK-FP16-SD-NEXT: mov x8, #1098907648 // =0x41800000 +; CHECK-FP16-SD-NEXT: movk x8, #16640, lsl #48 +; CHECK-FP16-SD-NEXT: fmov d1, x8 +; CHECK-FP16-SD-NEXT: fmul v0.2s, v0.2s, v1.2s +; CHECK-FP16-SD-NEXT: fcvtzu v0.2s, v0.2s +; CHECK-FP16-SD-NEXT: ret +; +; CHECK-NO16-GI-LABEL: test9_sat: +; CHECK-NO16-GI: // %bb.0: +; CHECK-NO16-GI-NEXT: adrp x8, .LCPI27_0 +; CHECK-NO16-GI-NEXT: ldr d1, [x8, :lo12:.LCPI27_0] +; CHECK-NO16-GI-NEXT: fmul v0.2s, v0.2s, v1.2s +; CHECK-NO16-GI-NEXT: fcvtzu v0.2s, v0.2s +; CHECK-NO16-GI-NEXT: ret +; +; CHECK-FP16-GI-LABEL: test9_sat: +; CHECK-FP16-GI: // %bb.0: +; CHECK-FP16-GI-NEXT: adrp x8, .LCPI27_0 +; CHECK-FP16-GI-NEXT: ldr d1, [x8, :lo12:.LCPI27_0] +; CHECK-FP16-GI-NEXT: fmul v0.2s, v0.2s, v1.2s +; CHECK-FP16-GI-NEXT: fcvtzu v0.2s, v0.2s +; CHECK-FP16-GI-NEXT: ret %mul.i = fmul <2 x float> %f, <float 16.000000e+00, float 8.000000e+00> %vcvt.i = call <2 x i32> @llvm.fptoui.sat.v2i32.v2f32(<2 x float> %mul.i) ret <2 x i32> %vcvt.i diff --git a/llvm/test/CodeGen/AArch64/fixed-vector-interleave.ll b/llvm/test/CodeGen/AArch64/fixed-vector-interleave.ll index 6e24105..eb9d576 100644 --- a/llvm/test/CodeGen/AArch64/fixed-vector-interleave.ll +++ b/llvm/test/CodeGen/AArch64/fixed-vector-interleave.ll @@ -151,8 +151,9 @@ define <4 x i16> @interleave2_same_const_splat_v4i16() { define <4 x i16> @interleave2_diff_const_splat_v4i16() { ; CHECK-SD-LABEL: interleave2_diff_const_splat_v4i16: ; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: adrp x8, .LCPI11_0 -; CHECK-SD-NEXT: ldr d0, [x8, :lo12:.LCPI11_0] +; CHECK-SD-NEXT: mov x8, #1125899907104768 // =0x4000000040000 +; CHECK-SD-NEXT: orr x8, x8, #0x300000003 +; CHECK-SD-NEXT: fmov d0, x8 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: interleave2_diff_const_splat_v4i16: diff --git a/llvm/test/CodeGen/AArch64/neon-abd.ll b/llvm/test/CodeGen/AArch64/neon-abd.ll index e0406e4..931963e 100644 --- a/llvm/test/CodeGen/AArch64/neon-abd.ll +++ b/llvm/test/CodeGen/AArch64/neon-abd.ll @@ -598,13 +598,14 @@ entry: define <8 x i32> @sabd_8h_bv_imm(<8 x i16> %a) { ; CHECK-LABEL: sabd_8h_bv_imm: ; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov x9, #549747425280 // =0x7fff800000 ; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8 ; CHECK-NEXT: adrp x8, .LCPI45_0 -; CHECK-NEXT: adrp x9, .LCPI45_1 +; CHECK-NEXT: movk x9, #69, lsl #48 ; CHECK-NEXT: ldr d2, [x8, :lo12:.LCPI45_0] -; CHECK-NEXT: ldr d3, [x9, :lo12:.LCPI45_1] -; CHECK-NEXT: sabdl v0.4s, v0.4h, v3.4h -; CHECK-NEXT: sabdl v1.4s, v1.4h, v2.4h +; CHECK-NEXT: fmov d3, x9 +; CHECK-NEXT: sabdl v0.4s, v0.4h, v2.4h +; CHECK-NEXT: sabdl v1.4s, v1.4h, v3.4h ; CHECK-NEXT: ret entry: %conv = sext <8 x i16> %a to <8 x i32> diff --git a/llvm/test/CodeGen/AArch64/neon-mov.ll b/llvm/test/CodeGen/AArch64/neon-mov.ll index 4f65786..ca5af2c 100644 --- a/llvm/test/CodeGen/AArch64/neon-mov.ll +++ b/llvm/test/CodeGen/AArch64/neon-mov.ll @@ -600,16 +600,18 @@ declare <2 x i32> @test_movi1d(<2 x i32>, <2 x i32>) define <2 x i32> @movi1d() { ; CHECK-NOFP16-SD-LABEL: movi1d: ; CHECK-NOFP16-SD: // %bb.0: +; CHECK-NOFP16-SD-NEXT: mov x8, #9223231299366420480 // =0x7fff800000000000 ; CHECK-NOFP16-SD-NEXT: movi d1, #0x00ffffffff0000 -; CHECK-NOFP16-SD-NEXT: adrp x8, .LCPI57_0 -; CHECK-NOFP16-SD-NEXT: ldr d0, [x8, :lo12:.LCPI57_0] +; CHECK-NOFP16-SD-NEXT: movk x8, #32768, lsl #16 +; CHECK-NOFP16-SD-NEXT: fmov d0, x8 ; CHECK-NOFP16-SD-NEXT: b test_movi1d ; ; CHECK-FP16-SD-LABEL: movi1d: ; CHECK-FP16-SD: // %bb.0: +; CHECK-FP16-SD-NEXT: mov x8, #9223231299366420480 // =0x7fff800000000000 ; CHECK-FP16-SD-NEXT: movi d1, #0x00ffffffff0000 -; CHECK-FP16-SD-NEXT: adrp x8, .LCPI57_0 -; CHECK-FP16-SD-NEXT: ldr d0, [x8, :lo12:.LCPI57_0] +; CHECK-FP16-SD-NEXT: movk x8, #32768, lsl #16 +; CHECK-FP16-SD-NEXT: fmov d0, x8 ; CHECK-FP16-SD-NEXT: b test_movi1d ; ; CHECK-NOFP16-GI-LABEL: movi1d: diff --git a/llvm/test/CodeGen/AArch64/neon-stepvector.ll b/llvm/test/CodeGen/AArch64/neon-stepvector.ll index 7a8a3c3..d5a3581 100644 --- a/llvm/test/CodeGen/AArch64/neon-stepvector.ll +++ b/llvm/test/CodeGen/AArch64/neon-stepvector.ll @@ -139,13 +139,10 @@ entry: } define <2 x i32> @stepvector_v2i32() { -; CHECK-LABEL: .LCPI6_0: -; CHECK-NEXT: .word 0 -; CHECK-NEXT: .word 1 ; CHECK-LABEL: stepvector_v2i32: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: adrp x8, .LCPI6_0 -; CHECK-NEXT: ldr d0, [x8, :lo12:.LCPI6_0] +; CHECK-NEXT: mov x8, #4294967296 // =0x100000000 +; CHECK-NEXT: fmov d0, x8 ; CHECK-NEXT: ret entry: %0 = call <2 x i32> @llvm.stepvector.v2i32() diff --git a/llvm/test/CodeGen/AArch64/pr58350.ll b/llvm/test/CodeGen/AArch64/pr58350.ll index f7efab1..0daeb9d 100644 --- a/llvm/test/CodeGen/AArch64/pr58350.ll +++ b/llvm/test/CodeGen/AArch64/pr58350.ll @@ -10,13 +10,13 @@ define void @f(<1 x float> %a, i64 %b) { ; CHECK: // %bb.0: ; CHECK-NEXT: sub sp, sp, #16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: adrp x8, .LCPI0_0 -; CHECK-NEXT: mov x9, sp +; CHECK-NEXT: mov x8, sp +; CHECK-NEXT: mov x9, #1056964608 // =0x3f000000 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI0_0] -; CHECK-NEXT: bfi x9, x0, #2, #1 -; CHECK-NEXT: str d1, [sp] -; CHECK-NEXT: ldr s1, [x9] +; CHECK-NEXT: bfi x8, x0, #2, #1 +; CHECK-NEXT: movk x9, #16256, lsl #48 +; CHECK-NEXT: str x9, [sp] +; CHECK-NEXT: ldr s1, [x8] ; CHECK-NEXT: mov v1.s[1], v0.s[0] ; CHECK-NEXT: str d1, [sp, #8] ; CHECK-NEXT: add sp, sp, #16 diff --git a/llvm/test/CodeGen/AArch64/shuffle-tbl34.ll b/llvm/test/CodeGen/AArch64/shuffle-tbl34.ll index 15e4887..eac7407 100644 --- a/llvm/test/CodeGen/AArch64/shuffle-tbl34.ll +++ b/llvm/test/CodeGen/AArch64/shuffle-tbl34.ll @@ -244,18 +244,9 @@ define <4 x i32> @shuffle4_v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c, <4 x ret <4 x i32> %z } -; CHECK: .LCPI6_0: -; CHECK: .byte 0 // 0x0 -; CHECK: .byte 7 // 0x7 -; CHECK: .byte 255 // 0xff -; CHECK: .byte 1 // 0x1 -; CHECK: .byte 255 // 0xff -; CHECK: .byte 255 // 0xff -; CHECK: .byte 255 // 0xff -; CHECK: .byte 255 // 0xff ; CHECK: .section .rodata.cst16,"aM",@progbits,16 ; CHECK: .p2align 4 -; CHECK: .LCPI6_1: +; CHECK: .LCPI6_0: ; CHECK: .byte 0 // 0x0 ; CHECK: .byte 16 // 0x10 ; CHECK: .byte 19 // 0x13 @@ -276,15 +267,16 @@ define <16 x i8> @shuffle4_v8i8_v16i8(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c, <8 ; CHECK-LABEL: shuffle4_v8i8_v16i8: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 +; CHECK-NEXT: mov x8, #-63744 // =0xffffffffffff0700 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: adrp x8, .LCPI6_0 ; CHECK-NEXT: mov v2.d[1], v2.d[0] +; CHECK-NEXT: movk x8, #511, lsl #16 ; CHECK-NEXT: mov v0.d[1], v0.d[0] -; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI6_0] -; CHECK-NEXT: adrp x8, .LCPI6_1 +; CHECK-NEXT: fmov d1, x8 +; CHECK-NEXT: adrp x8, .LCPI6_0 ; CHECK-NEXT: tbl v3.8b, { v2.16b }, v1.8b ; CHECK-NEXT: tbl v2.8b, { v0.16b }, v1.8b -; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI6_1] +; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI6_0] ; CHECK-NEXT: tbl v0.16b, { v2.16b, v3.16b }, v0.16b ; CHECK-NEXT: ret %x = shufflevector <8 x i8> %a, <8 x i8> %b, <4 x i32> <i32 0, i32 7, i32 5, i32 1> @@ -295,15 +287,6 @@ define <16 x i8> @shuffle4_v8i8_v16i8(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c, <8 ; CHECK: .LCPI7_0: ; CHECK: .byte 0 // 0x0 -; CHECK: .byte 7 // 0x7 -; CHECK: .byte 255 // 0xff -; CHECK: .byte 1 // 0x1 -; CHECK: .byte 255 // 0xff -; CHECK: .byte 255 // 0xff -; CHECK: .byte 255 // 0xff -; CHECK: .byte 255 // 0xff -; CHECK: .LCPI7_1: -; CHECK: .byte 0 // 0x0 ; CHECK: .byte 8 // 0x8 ; CHECK: .byte 11 // 0xb ; CHECK: .byte 3 // 0x3 @@ -316,14 +299,15 @@ define <8 x i8> @shuffle4_v8i8_v8i8(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c, <8 x ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: adrp x8, .LCPI7_0 +; CHECK-NEXT: mov x8, #-63744 // =0xffffffffffff0700 +; CHECK-NEXT: movk x8, #511, lsl #16 ; CHECK-NEXT: mov v2.d[1], v2.d[0] ; CHECK-NEXT: mov v0.d[1], v0.d[0] -; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI7_0] -; CHECK-NEXT: adrp x8, .LCPI7_1 +; CHECK-NEXT: fmov d1, x8 +; CHECK-NEXT: adrp x8, .LCPI7_0 ; CHECK-NEXT: tbl v2.8b, { v2.16b }, v1.8b ; CHECK-NEXT: tbl v0.8b, { v0.16b }, v1.8b -; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI7_1] +; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI7_0] ; CHECK-NEXT: mov v0.d[1], v2.d[0] ; CHECK-NEXT: tbl v0.8b, { v0.16b }, v1.8b ; CHECK-NEXT: ret @@ -531,18 +515,9 @@ define <4 x i32> @shuffle3_v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { ret <4 x i32> %z } -; CHECK: .LCPI14_0: -; CHECK: .byte 4 // 0x4 -; CHECK: .byte 8 // 0x8 -; CHECK: .byte 255 // 0xff -; CHECK: .byte 255 // 0xff -; CHECK: .byte 14 // 0xe -; CHECK: .byte 3 // 0x3 -; CHECK: .byte 255 // 0xff -; CHECK: .byte 255 // 0xff ; CHECK: .section .rodata.cst16,"aM",@progbits,16 ; CHECK: .p2align 4 -; CHECK: .LCPI14_1: +; CHECK: .LCPI14_0: ; CHECK: .byte 255 // 0xff ; CHECK: .byte 255 // 0xff ; CHECK: .byte 15 // 0xf @@ -563,16 +538,17 @@ define <8 x i8> @insert4_v8i8(<8 x i8> %a, <16 x i8> %b, <8 x i8> %c, <16 x i8> ; CHECK-LABEL: insert4_v8i8: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: mov x9, #-63484 // =0xffffffffffff0804 ; CHECK-NEXT: mov v4.16b, v3.16b ; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 ; CHECK-NEXT: adrp x8, .LCPI14_0 -; CHECK-NEXT: adrp x9, .LCPI14_1 ; CHECK-NEXT: mov v0.d[1], v2.d[0] +; CHECK-NEXT: movk x9, #782, lsl #32 ; CHECK-NEXT: mov v3.16b, v1.16b -; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI14_0] -; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI14_1] -; CHECK-NEXT: tbl v0.8b, { v0.16b }, v1.8b -; CHECK-NEXT: tbl v1.16b, { v3.16b, v4.16b }, v2.16b +; CHECK-NEXT: fmov d2, x9 +; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI14_0] +; CHECK-NEXT: tbl v1.16b, { v3.16b, v4.16b }, v1.16b +; CHECK-NEXT: tbl v0.8b, { v0.16b }, v2.8b ; CHECK-NEXT: trn1 v0.4h, v1.4h, v0.4h ; CHECK-NEXT: trn2 v0.4h, v0.4h, v1.4h ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/shuffles.ll b/llvm/test/CodeGen/AArch64/shuffles.ll index 41dd7f0..930c3df 100644 --- a/llvm/test/CodeGen/AArch64/shuffles.ll +++ b/llvm/test/CodeGen/AArch64/shuffles.ll @@ -339,10 +339,11 @@ define <8 x i8> @test_shuf8(<8 x i8> %a, <8 x i8> %b) ; CHECKLE-LABEL: test_shuf8: ; CHECKLE: // %bb.0: ; CHECKLE-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECKLE-NEXT: mov x8, #8830452760576 // =0x80800000000 ; CHECKLE-NEXT: // kill: def $d1 killed $d1 def $q1 -; CHECKLE-NEXT: adrp x8, .LCPI12_0 ; CHECKLE-NEXT: mov v0.d[1], v1.d[0] -; CHECKLE-NEXT: ldr d1, [x8, :lo12:.LCPI12_0] +; CHECKLE-NEXT: movk x8, #2056, lsl #48 +; CHECKLE-NEXT: fmov d1, x8 ; CHECKLE-NEXT: tbl v0.8b, { v0.16b }, v1.8b ; CHECKLE-NEXT: ret ; diff --git a/llvm/test/CodeGen/AArch64/srem-vector-lkk.ll b/llvm/test/CodeGen/AArch64/srem-vector-lkk.ll index b165ac0..52b0956 100644 --- a/llvm/test/CodeGen/AArch64/srem-vector-lkk.ll +++ b/llvm/test/CodeGen/AArch64/srem-vector-lkk.ll @@ -4,18 +4,18 @@ define <4 x i16> @fold_srem_vec_1(<4 x i16> %x) { ; CHECK-LABEL: fold_srem_vec_1: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI0_1 -; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI0_1] ; CHECK-NEXT: adrp x8, .LCPI0_0 -; CHECK-NEXT: ldr d2, [x8, :lo12:.LCPI0_0] -; CHECK-NEXT: adrp x8, .LCPI0_2 +; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI0_0] +; CHECK-NEXT: mov x8, #-281474976710655 // =0xffff000000000001 +; CHECK-NEXT: fmov d2, x8 +; CHECK-NEXT: adrp x8, .LCPI0_1 ; CHECK-NEXT: smull v1.4s, v0.4h, v1.4h ; CHECK-NEXT: shrn v1.4h, v1.4s, #16 ; CHECK-NEXT: mla v1.4h, v0.4h, v2.4h -; CHECK-NEXT: ldr d2, [x8, :lo12:.LCPI0_2] -; CHECK-NEXT: adrp x8, .LCPI0_3 +; CHECK-NEXT: ldr d2, [x8, :lo12:.LCPI0_1] +; CHECK-NEXT: adrp x8, .LCPI0_2 ; CHECK-NEXT: sshl v1.4h, v1.4h, v2.4h -; CHECK-NEXT: ldr d2, [x8, :lo12:.LCPI0_3] +; CHECK-NEXT: ldr d2, [x8, :lo12:.LCPI0_2] ; CHECK-NEXT: usra v1.4h, v1.4h, #15 ; CHECK-NEXT: mls v0.4h, v1.4h, v2.4h ; CHECK-NEXT: ret @@ -66,16 +66,17 @@ define <4 x i16> @combine_srem_sdiv(<4 x i16> %x) { define <4 x i16> @dont_fold_srem_power_of_two(<4 x i16> %x) { ; CHECK-LABEL: dont_fold_srem_power_of_two: ; CHECK: // %bb.0: +; CHECK-NEXT: mov x8, #-9222949817947160575 // =0x8001800180018001 +; CHECK-NEXT: movk x8, #44151, lsl #48 +; CHECK-NEXT: fmov d1, x8 ; CHECK-NEXT: adrp x8, .LCPI3_0 -; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI3_0] +; CHECK-NEXT: ldr d2, [x8, :lo12:.LCPI3_0] ; CHECK-NEXT: adrp x8, .LCPI3_1 -; CHECK-NEXT: ldr d2, [x8, :lo12:.LCPI3_1] -; CHECK-NEXT: adrp x8, .LCPI3_2 ; CHECK-NEXT: smull v1.4s, v0.4h, v1.4h ; CHECK-NEXT: shrn v1.4h, v1.4s, #16 ; CHECK-NEXT: add v1.4h, v1.4h, v0.4h ; CHECK-NEXT: sshl v1.4h, v1.4h, v2.4h -; CHECK-NEXT: ldr d2, [x8, :lo12:.LCPI3_2] +; CHECK-NEXT: ldr d2, [x8, :lo12:.LCPI3_1] ; CHECK-NEXT: usra v1.4h, v1.4h, #15 ; CHECK-NEXT: mls v0.4h, v1.4h, v2.4h ; CHECK-NEXT: ret @@ -112,21 +113,22 @@ define <4 x i16> @dont_fold_srem_one(<4 x i16> %x) { define <4 x i16> @dont_fold_srem_i16_smax(<4 x i16> %x) { ; CHECK-LABEL: dont_fold_srem_i16_smax: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI5_1 -; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI5_1] ; CHECK-NEXT: adrp x8, .LCPI5_0 -; CHECK-NEXT: ldr d2, [x8, :lo12:.LCPI5_0] -; CHECK-NEXT: adrp x8, .LCPI5_2 +; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI5_0] +; CHECK-NEXT: mov x8, #8589869056 // =0x1ffff0000 +; CHECK-NEXT: movk x8, #1 ; CHECK-NEXT: smull v1.4s, v0.4h, v1.4h +; CHECK-NEXT: fmov d2, x8 +; CHECK-NEXT: adrp x8, .LCPI5_1 ; CHECK-NEXT: shrn v1.4h, v1.4s, #16 ; CHECK-NEXT: mla v1.4h, v0.4h, v2.4h -; CHECK-NEXT: ldr d2, [x8, :lo12:.LCPI5_2] -; CHECK-NEXT: adrp x8, .LCPI5_3 +; CHECK-NEXT: ldr d2, [x8, :lo12:.LCPI5_1] +; CHECK-NEXT: adrp x8, .LCPI5_2 ; CHECK-NEXT: sshl v1.4h, v1.4h, v2.4h ; CHECK-NEXT: ushr v2.4h, v1.4h, #15 ; CHECK-NEXT: mov v2.h[0], wzr ; CHECK-NEXT: add v1.4h, v1.4h, v2.4h -; CHECK-NEXT: ldr d2, [x8, :lo12:.LCPI5_3] +; CHECK-NEXT: ldr d2, [x8, :lo12:.LCPI5_2] ; CHECK-NEXT: mls v0.4h, v1.4h, v2.4h ; CHECK-NEXT: ret %1 = srem <4 x i16> %x, <i16 1, i16 32768, i16 23, i16 5423> diff --git a/llvm/test/CodeGen/AArch64/urem-seteq-illegal-types.ll b/llvm/test/CodeGen/AArch64/urem-seteq-illegal-types.ll index c4f6e7d..d3a8540 100644 --- a/llvm/test/CodeGen/AArch64/urem-seteq-illegal-types.ll +++ b/llvm/test/CodeGen/AArch64/urem-seteq-illegal-types.ll @@ -66,23 +66,24 @@ define <3 x i1> @test_urem_vec(<3 x i11> %X) nounwind { ; CHECK-LABEL: test_urem_vec: ; CHECK: // %bb.0: ; CHECK-NEXT: fmov s0, w0 +; CHECK-NEXT: mov x8, #65536 // =0x10000 +; CHECK-NEXT: movk x8, #2, lsl #32 +; CHECK-NEXT: fmov d1, x8 ; CHECK-NEXT: adrp x8, .LCPI4_0 -; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI4_0] -; CHECK-NEXT: adrp x8, .LCPI4_1 ; CHECK-NEXT: mov v0.h[1], w1 ; CHECK-NEXT: mov v0.h[2], w2 ; CHECK-NEXT: sub v0.4h, v0.4h, v1.4h -; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI4_1] +; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI4_0] +; CHECK-NEXT: adrp x8, .LCPI4_1 +; CHECK-NEXT: ldr d3, [x8, :lo12:.LCPI4_1] ; CHECK-NEXT: adrp x8, .LCPI4_2 -; CHECK-NEXT: ldr d3, [x8, :lo12:.LCPI4_2] -; CHECK-NEXT: adrp x8, .LCPI4_3 ; CHECK-NEXT: mul v0.4h, v0.4h, v1.4h ; CHECK-NEXT: movi d1, #0x0000000000ffff ; CHECK-NEXT: add v2.4h, v0.4h, v0.4h ; CHECK-NEXT: bic v0.4h, #248, lsl #8 ; CHECK-NEXT: ushl v2.4h, v2.4h, v3.4h ; CHECK-NEXT: ushl v0.4h, v0.4h, v1.4h -; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI4_3] +; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI4_2] ; CHECK-NEXT: orr v0.8b, v0.8b, v2.8b ; CHECK-NEXT: bic v0.4h, #248, lsl #8 ; CHECK-NEXT: cmhi v0.4h, v0.4h, v1.4h diff --git a/llvm/test/CodeGen/AArch64/urem-vector-lkk.ll b/llvm/test/CodeGen/AArch64/urem-vector-lkk.ll index 4be8c37..7c76479 100644 --- a/llvm/test/CodeGen/AArch64/urem-vector-lkk.ll +++ b/llvm/test/CodeGen/AArch64/urem-vector-lkk.ll @@ -4,11 +4,11 @@ define <4 x i16> @fold_urem_vec_1(<4 x i16> %x) { ; CHECK-LABEL: fold_urem_vec_1: ; CHECK: // %bb.0: +; CHECK-NEXT: mov x8, #281474976579584 // =0xfffffffe0000 +; CHECK-NEXT: fmov d1, x8 ; CHECK-NEXT: adrp x8, .LCPI0_0 -; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI0_0] +; CHECK-NEXT: ldr d2, [x8, :lo12:.LCPI0_0] ; CHECK-NEXT: adrp x8, .LCPI0_1 -; CHECK-NEXT: ldr d2, [x8, :lo12:.LCPI0_1] -; CHECK-NEXT: adrp x8, .LCPI0_2 ; CHECK-NEXT: ushl v1.4h, v0.4h, v1.4h ; CHECK-NEXT: umull v1.4s, v1.4h, v2.4h ; CHECK-NEXT: movi d2, #0000000000000000 @@ -18,10 +18,10 @@ define <4 x i16> @fold_urem_vec_1(<4 x i16> %x) { ; CHECK-NEXT: umull v2.4s, v3.4h, v2.4h ; CHECK-NEXT: shrn v2.4h, v2.4s, #16 ; CHECK-NEXT: add v1.4h, v2.4h, v1.4h -; CHECK-NEXT: ldr d2, [x8, :lo12:.LCPI0_2] -; CHECK-NEXT: adrp x8, .LCPI0_3 +; CHECK-NEXT: ldr d2, [x8, :lo12:.LCPI0_1] +; CHECK-NEXT: adrp x8, .LCPI0_2 ; CHECK-NEXT: ushl v1.4h, v1.4h, v2.4h -; CHECK-NEXT: ldr d2, [x8, :lo12:.LCPI0_3] +; CHECK-NEXT: ldr d2, [x8, :lo12:.LCPI0_2] ; CHECK-NEXT: mls v0.4h, v1.4h, v2.4h ; CHECK-NEXT: ret %1 = urem <4 x i16> %x, <i16 95, i16 124, i16 98, i16 1003> @@ -70,13 +70,13 @@ define <4 x i16> @dont_fold_urem_power_of_two(<4 x i16> %x) { ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI3_0 ; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI3_0] +; CHECK-NEXT: mov x8, #-1688849860263936 // =0xfffa000000000000 +; CHECK-NEXT: fmov d2, x8 ; CHECK-NEXT: adrp x8, .LCPI3_1 -; CHECK-NEXT: ldr d2, [x8, :lo12:.LCPI3_1] -; CHECK-NEXT: adrp x8, .LCPI3_2 ; CHECK-NEXT: umull v1.4s, v0.4h, v1.4h ; CHECK-NEXT: shrn v1.4h, v1.4s, #16 ; CHECK-NEXT: ushl v1.4h, v1.4h, v2.4h -; CHECK-NEXT: ldr d2, [x8, :lo12:.LCPI3_2] +; CHECK-NEXT: ldr d2, [x8, :lo12:.LCPI3_1] ; CHECK-NEXT: mls v0.4h, v1.4h, v2.4h ; CHECK-NEXT: ret %1 = urem <4 x i16> %x, <i16 64, i16 32, i16 8, i16 95> @@ -89,9 +89,9 @@ define <4 x i16> @dont_fold_urem_one(<4 x i16> %x) { ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI4_0 ; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI4_0] +; CHECK-NEXT: mov x8, #140737488355328 // =0x800000000000 +; CHECK-NEXT: fmov d3, x8 ; CHECK-NEXT: adrp x8, .LCPI4_1 -; CHECK-NEXT: ldr d3, [x8, :lo12:.LCPI4_1] -; CHECK-NEXT: adrp x8, .LCPI4_2 ; CHECK-NEXT: umull v1.4s, v0.4h, v1.4h ; CHECK-NEXT: shrn v1.4h, v1.4s, #16 ; CHECK-NEXT: sub v2.4h, v0.4h, v1.4h @@ -99,10 +99,10 @@ define <4 x i16> @dont_fold_urem_one(<4 x i16> %x) { ; CHECK-NEXT: movi d3, #0x0000000000ffff ; CHECK-NEXT: shrn v2.4h, v2.4s, #16 ; CHECK-NEXT: add v1.4h, v2.4h, v1.4h -; CHECK-NEXT: ldr d2, [x8, :lo12:.LCPI4_2] -; CHECK-NEXT: adrp x8, .LCPI4_3 +; CHECK-NEXT: ldr d2, [x8, :lo12:.LCPI4_1] +; CHECK-NEXT: adrp x8, .LCPI4_2 ; CHECK-NEXT: ushl v1.4h, v1.4h, v2.4h -; CHECK-NEXT: ldr d2, [x8, :lo12:.LCPI4_3] +; CHECK-NEXT: ldr d2, [x8, :lo12:.LCPI4_2] ; CHECK-NEXT: bit v1.8b, v0.8b, v3.8b ; CHECK-NEXT: mls v0.4h, v1.4h, v2.4h ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/vec-combine-compare-to-bitmask.ll b/llvm/test/CodeGen/AArch64/vec-combine-compare-to-bitmask.ll index f584595..af8cda4 100644 --- a/llvm/test/CodeGen/AArch64/vec-combine-compare-to-bitmask.ll +++ b/llvm/test/CodeGen/AArch64/vec-combine-compare-to-bitmask.ll @@ -558,9 +558,10 @@ define i16 @convert_to_bitmask_without_knowing_type(<16 x i1> %vec) { define i2 @convert_to_bitmask_2xi32(<2 x i32> %vec) { ; CHECK-LABEL: convert_to_bitmask_2xi32: ; CHECK: ; %bb.0: -; CHECK-NEXT: adrp x8, lCPI11_0@PAGE +; CHECK-NEXT: mov x8, #1 ; =0x1 ; CHECK-NEXT: cmeq.2s v0, v0, #0 -; CHECK-NEXT: ldr d1, [x8, lCPI11_0@PAGEOFF] +; CHECK-NEXT: movk x8, #2, lsl #32 +; CHECK-NEXT: fmov d1, x8 ; CHECK-NEXT: bic.8b v0, v1, v0 ; CHECK-NEXT: addp.2s v0, v0, v0 ; CHECK-NEXT: fmov w0, s0 diff --git a/llvm/test/CodeGen/AArch64/vec-combine-compare-truncate-store.ll b/llvm/test/CodeGen/AArch64/vec-combine-compare-truncate-store.ll index 3c42079..f3c4856 100644 --- a/llvm/test/CodeGen/AArch64/vec-combine-compare-truncate-store.ll +++ b/llvm/test/CodeGen/AArch64/vec-combine-compare-truncate-store.ll @@ -169,16 +169,14 @@ define void @store_4_elements_64_bit_vector(<4 x i16> %vec, ptr %out) { define void @store_2_elements_64_bit_vector(<2 x i32> %vec, ptr %out) { ; CHECK-LABEL: store_2_elements_64_bit_vector: ; CHECK: ; %bb.0: -; CHECK-NEXT: Lloh16: -; CHECK-NEXT: adrp x8, lCPI8_0@PAGE +; CHECK-NEXT: mov x8, #1 ; =0x1 ; CHECK-NEXT: cmeq.2s v0, v0, #0 -; CHECK-NEXT: Lloh17: -; CHECK-NEXT: ldr d1, [x8, lCPI8_0@PAGEOFF] +; CHECK-NEXT: movk x8, #2, lsl #32 +; CHECK-NEXT: fmov d1, x8 ; CHECK-NEXT: bic.8b v0, v1, v0 ; CHECK-NEXT: addp.2s v0, v0, v0 ; CHECK-NEXT: str b0, [x0] ; CHECK-NEXT: ret -; CHECK-NEXT: .loh AdrpLdr Lloh16, Lloh17 %cmp_result = icmp ne <2 x i32> %vec, zeroinitializer diff --git a/llvm/test/CodeGen/AArch64/vector-extract-last-active.ll b/llvm/test/CodeGen/AArch64/vector-extract-last-active.ll index 81382ee..876d2f6 100644 --- a/llvm/test/CodeGen/AArch64/vector-extract-last-active.ll +++ b/llvm/test/CodeGen/AArch64/vector-extract-last-active.ll @@ -152,9 +152,9 @@ define i64 @extract_last_i64(<2 x i64> %data, <2 x i64> %mask, i64 %passthru) { ; NEON-FIXED-NEXT: sub sp, sp, #16 ; NEON-FIXED-NEXT: .cfi_def_cfa_offset 16 ; NEON-FIXED-NEXT: cmtst v1.2d, v1.2d, v1.2d -; NEON-FIXED-NEXT: adrp x8, .LCPI3_0 +; NEON-FIXED-NEXT: mov x8, #4294967296 // =0x100000000 ; NEON-FIXED-NEXT: mov x9, sp -; NEON-FIXED-NEXT: ldr d3, [x8, :lo12:.LCPI3_0] +; NEON-FIXED-NEXT: fmov d3, x8 ; NEON-FIXED-NEXT: str q0, [sp] ; NEON-FIXED-NEXT: xtn v2.2s, v1.2d ; NEON-FIXED-NEXT: umaxv s1, v1.4s @@ -348,9 +348,9 @@ define double @extract_last_double(<2 x double> %data, <2 x i64> %mask, double % ; NEON-FIXED-NEXT: sub sp, sp, #16 ; NEON-FIXED-NEXT: .cfi_def_cfa_offset 16 ; NEON-FIXED-NEXT: cmtst v1.2d, v1.2d, v1.2d -; NEON-FIXED-NEXT: adrp x8, .LCPI7_0 +; NEON-FIXED-NEXT: mov x8, #4294967296 // =0x100000000 ; NEON-FIXED-NEXT: mov x9, sp -; NEON-FIXED-NEXT: ldr d4, [x8, :lo12:.LCPI7_0] +; NEON-FIXED-NEXT: fmov d4, x8 ; NEON-FIXED-NEXT: str q0, [sp] ; NEON-FIXED-NEXT: xtn v3.2s, v1.2d ; NEON-FIXED-NEXT: umaxv s1, v1.4s |
