diff options
Diffstat (limited to 'llvm/test/CodeGen/AArch64')
19 files changed, 2893 insertions, 315 deletions
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator.ll b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator.ll index 0f75887..c68ae92 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator.ll @@ -1734,8 +1734,7 @@ define float @test_different_call_conv_target(float %x) { define <2 x i32> @test_shufflevector_s32_v2s32(i32 %arg) { ; CHECK-LABEL: name: test_shufflevector_s32_v2s32 ; CHECK: [[ARG:%[0-9]+]]:_(s32) = COPY $w0 -; CHECK-DAG: [[UNDEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF -; CHECK: [[VEC:%[0-9]+]]:_(<2 x s32>) = G_SHUFFLE_VECTOR [[ARG]](s32), [[UNDEF]], shufflemask(0, 0) +; CHECK: [[VEC:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[ARG]](s32), [[ARG]](s32) ; CHECK: $d0 = COPY [[VEC]](<2 x s32>) %vec = insertelement <1 x i32> undef, i32 %arg, i32 0 %res = shufflevector <1 x i32> %vec, <1 x i32> undef, <2 x i32> zeroinitializer @@ -1745,7 +1744,8 @@ define <2 x i32> @test_shufflevector_s32_v2s32(i32 %arg) { define i32 @test_shufflevector_v2s32_s32(<2 x i32> %arg) { ; CHECK-LABEL: name: test_shufflevector_v2s32_s32 ; CHECK: [[ARG:%[0-9]+]]:_(<2 x s32>) = COPY $d0 -; CHECK: [[RES:%[0-9]+]]:_(s32) = G_SHUFFLE_VECTOR [[ARG]](<2 x s32>), [[UNDEF]], shufflemask(1) +; CHECK: [[IDX:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 +; CHECK: [[RES:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[ARG]](<2 x s32>), [[IDX]](s64) ; CHECK: $w0 = COPY [[RES]](s32) %vec = shufflevector <2 x i32> %arg, <2 x i32> undef, <1 x i32> <i32 1> %res = extractelement <1 x i32> %vec, i32 0 diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-shuffle-vector.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-shuffle-vector.mir index 2e70252..fdd0ebb 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-shuffle-vector.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-shuffle-vector.mir @@ -119,33 +119,6 @@ body: | ... --- -name: shuffle_1elt_mask -alignment: 4 -tracksRegLiveness: true -body: | - bb.1: - liveins: $d0, $d1 - - ; CHECK-LABEL: name: shuffle_1elt_mask - ; CHECK: liveins: $d0, $d1 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $d0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $d1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY [[COPY]](s64) - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s64) = COPY [[COPY1]](s64) - ; CHECK-NEXT: $d0 = COPY [[COPY2]](s64) - ; CHECK-NEXT: $d1 = COPY [[COPY3]](s64) - ; CHECK-NEXT: RET_ReallyLR implicit $d0, implicit $d1 - %0:_(s64) = COPY $d0 - %1:_(s64) = COPY $d1 - %3:_(s64) = G_SHUFFLE_VECTOR %0:_(s64), %1:_, shufflemask(0) - %4:_(s64) = G_SHUFFLE_VECTOR %0:_(s64), %1:_, shufflemask(1) - $d0 = COPY %3(s64) - $d1 = COPY %4(s64) - RET_ReallyLR implicit $d0, implicit $d1 - -... ---- name: oversize_shuffle_v4i64 alignment: 4 tracksRegLiveness: true @@ -641,7 +614,8 @@ body: | %0:_(<8 x s1>) = G_TRUNC %2:_(<8 x s8>) %3:_(<8 x s8>) = COPY $d1 %1:_(<8 x s1>) = G_TRUNC %3:_(<8 x s8>) - %4:_(s1) = G_SHUFFLE_VECTOR %0:_(<8 x s1>), %1:_, shufflemask(12) + %7:_(s64) = G_CONSTANT i64 4 + %4:_(s1) = G_EXTRACT_VECTOR_ELT %1:_(<8 x s1>), %7(s64) %5:_(s8) = G_ZEXT %4:_(s1) %6:_(s32) = G_ANYEXT %5:_(s8) $w0 = COPY %6:_(s32) diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-shuffle-vector-disjoint-mask.mir b/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-shuffle-vector-disjoint-mask.mir index 9261d7a..914dfa0 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-shuffle-vector-disjoint-mask.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-shuffle-vector-disjoint-mask.mir @@ -80,22 +80,3 @@ body: | %2:_(<4 x s32>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(-1,0,1,-1) RET_ReallyLR implicit %2 ... - ---- -name: shuffle_vector_scalar -tracksRegLiveness: true -body: | - bb.1: - liveins: $x0, $x1 - - ; CHECK-LABEL: name: shuffle_vector_scalar - ; CHECK: liveins: $x0, $x1 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x0 - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[COPY]](s64), [[COPY]](s64), [[COPY]](s64), [[COPY]](s64) - ; CHECK-NEXT: RET_ReallyLR implicit [[BUILD_VECTOR]](<4 x s64>) - %0:_(s64) = COPY $x0 - %1:_(s64) = COPY $x1 - %2:_(<4 x s64>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(0, 0, 0, 0) - RET_ReallyLR implicit %2 -... diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-shuffle-vector-undef-rhs.mir b/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-shuffle-vector-undef-rhs.mir index 9bf7993..7a314ba 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-shuffle-vector-undef-rhs.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-shuffle-vector-undef-rhs.mir @@ -20,23 +20,3 @@ body: | %2:_(<4 x s32>) = G_SHUFFLE_VECTOR %0(<2 x s32>), %1(<2 x s32>), shufflemask(0, 2, 1, 3) RET_ReallyLR implicit %2 ... - ---- -name: shuffle_vector_undef_rhs_scalar -tracksRegLiveness: true -body: | - bb.1: - liveins: $x0 - - ; CHECK-LABEL: name: shuffle_vector_undef_rhs_scalar - ; CHECK: liveins: $x0 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x0 - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[COPY]](s64), [[DEF]](s64) - ; CHECK-NEXT: RET_ReallyLR implicit [[BUILD_VECTOR]](<2 x s64>) - %0:_(s64) = COPY $x0 - %1:_(s64) = G_IMPLICIT_DEF - %2:_(<2 x s64>) = G_SHUFFLE_VECTOR %0(s64), %1(s64), shufflemask(0, 1) - RET_ReallyLR implicit %2 -... diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-shuffle-vector.mir b/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-shuffle-vector.mir index 2c9ae5b..9013410 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-shuffle-vector.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-shuffle-vector.mir @@ -386,7 +386,7 @@ body: | ; CHECK-NEXT: RET_ReallyLR implicit [[BUILD_VECTOR]](<4 x p0>) %0:_(p0) = COPY $x0 %1:_(p0) = COPY $x1 - %6:_(<4 x p0>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(0,1,0,1) + %6:_(<4 x p0>) = G_BUILD_VECTOR %0, %1, %0, %1 RET_ReallyLR implicit %6 ... @@ -408,104 +408,6 @@ body: | ; CHECK-NEXT: RET_ReallyLR implicit [[BUILD_VECTOR]](<2 x p0>) %0:_(p0) = COPY $x0 %1:_(p0) = COPY $x1 - %6:_(<2 x p0>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(1,0) - RET_ReallyLR implicit %6 -... - -# Check that we properly use undef values when shuffle_vector -# on scalars gets lowered to build_vector. ---- -name: shuffle_vector_on_scalars_to_build_vector_with_undef -tracksRegLiveness: true -body: | - bb.1: - liveins: $x0, $x1 - - ; CHECK-LABEL: name: shuffle_vector_on_scalars_to_build_vector_with_undef - ; CHECK: liveins: $x0, $x1 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x1 - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[COPY]](s64), [[DEF]](s64), [[DEF]](s64), [[COPY1]](s64) - ; CHECK-NEXT: RET_ReallyLR implicit [[BUILD_VECTOR]](<4 x s64>) - %0:_(s64) = COPY $x0 - %1:_(s64) = COPY $x1 - %6:_(<4 x s64>) = G_SHUFFLE_VECTOR %0, %1, shufflemask(0,-1,-1,1) - RET_ReallyLR implicit %6 -... - -# Check that shuffle_vector on scalars gets combined into a plain -# copy when the resulting type is a scalar as well and the sizes -# are compatible. ---- -name: shuffle_vector_on_scalars_to_copy_ptr -tracksRegLiveness: true -body: | - bb.1: - liveins: $x0 - - ; CHECK-LABEL: name: shuffle_vector_on_scalars_to_copy_ptr - ; CHECK: liveins: $x0 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 - ; CHECK-NEXT: RET_ReallyLR implicit [[COPY]](p0) - %0:_(p0) = COPY $x0 - %6:_(p0) = G_SHUFFLE_VECTOR %0, %0, shufflemask(0) - RET_ReallyLR implicit %6 -... ---- -name: shuffle_vector_to_copy_lhs -tracksRegLiveness: true -body: | - bb.1: - liveins: $x0, $x1 - - ; CHECK-LABEL: name: shuffle_vector_to_copy_lhs - ; CHECK: liveins: $x0, $x1 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $x0 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 - ; CHECK-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<2 x s32>), [[C]](s64) - ; CHECK-NEXT: RET_ReallyLR implicit [[EVEC]](s32) - %0:_(<2 x s32>) = COPY $x0 - %1:_(<2 x s32>) = COPY $x1 - %6:_(s32) = G_SHUFFLE_VECTOR %0, %1, shufflemask(1) - RET_ReallyLR implicit %6 -... ---- -name: shuffle_vector_to_copy_rhs -tracksRegLiveness: true -body: | - bb.1: - liveins: $x0, $x1 - - ; CHECK-LABEL: name: shuffle_vector_to_copy_rhs - ; CHECK: liveins: $x0, $x1 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $x1 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<2 x s32>), [[C]](s64) - ; CHECK-NEXT: RET_ReallyLR implicit [[EVEC]](s32) - %0:_(<2 x s32>) = COPY $x0 - %1:_(<2 x s32>) = COPY $x1 - %6:_(s32) = G_SHUFFLE_VECTOR %0, %1, shufflemask(2) - RET_ReallyLR implicit %6 -... ---- -name: shuffle_vector_to_copy_undef -tracksRegLiveness: true -body: | - bb.1: - liveins: $x0, $x1 - - ; CHECK-LABEL: name: shuffle_vector_to_copy_undef - ; CHECK: liveins: $x0, $x1 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK-NEXT: RET_ReallyLR implicit [[DEF]](s32) - %0:_(<2 x s32>) = COPY $x0 - %1:_(<2 x s32>) = COPY $x1 - %6:_(s32) = G_SHUFFLE_VECTOR %0, %1, shufflemask(-1) + %6:_(<2 x p0>) = G_BUILD_VECTOR %1, %0 RET_ReallyLR implicit %6 ... diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/regbank-fp-use-def.mir b/llvm/test/CodeGen/AArch64/GlobalISel/regbank-fp-use-def.mir index b252884..46dbc15 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/regbank-fp-use-def.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/regbank-fp-use-def.mir @@ -96,7 +96,7 @@ body: | ; CHECK-NEXT: [[SITOFP:%[0-9]+]]:fpr(s32) = G_SITOFP [[COPY1]](s32) ; CHECK-NEXT: [[COPY3:%[0-9]+]]:fpr(s32) = COPY [[COPY2]](s32) ; CHECK-NEXT: [[SELECT:%[0-9]+]]:fpr(s32) = G_SELECT [[COPY2]](s32), [[COPY3]], [[SITOFP]] - ; CHECK-NEXT: [[FPTOSI:%[0-9]+]]:gpr(s32) = G_FPTOSI [[SELECT]](s32) + ; CHECK-NEXT: [[FPTOSI:%[0-9]+]]:fpr(s32) = G_FPTOSI [[SELECT]](s32) %0:_(s32) = COPY $w0 %2:_(s32) = COPY $w1 %3:_(s32) = COPY $w2 diff --git a/llvm/test/CodeGen/AArch64/aarch64-matrix-umull-smull.ll b/llvm/test/CodeGen/AArch64/aarch64-matrix-umull-smull.ll index 0933e67..b54f262 100644 --- a/llvm/test/CodeGen/AArch64/aarch64-matrix-umull-smull.ll +++ b/llvm/test/CodeGen/AArch64/aarch64-matrix-umull-smull.ll @@ -749,12 +749,429 @@ for.body: ; preds = %for.body.preheader1 br i1 %exitcond.not, label %for.cond.cleanup, label %for.body } +define i64 @red_mla_dup_ext_u8_s8_s64(ptr noalias noundef readonly captures(none) %A, i8 noundef %B, i32 noundef %n) { +; CHECK-SD-LABEL: red_mla_dup_ext_u8_s8_s64: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: // kill: def $w1 killed $w1 def $x1 +; CHECK-SD-NEXT: cbz w2, .LBB6_3 +; CHECK-SD-NEXT: // %bb.1: // %iter.check +; CHECK-SD-NEXT: str x25, [sp, #-64]! // 8-byte Folded Spill +; CHECK-SD-NEXT: stp x24, x23, [sp, #16] // 16-byte Folded Spill +; CHECK-SD-NEXT: stp x22, x21, [sp, #32] // 16-byte Folded Spill +; CHECK-SD-NEXT: stp x20, x19, [sp, #48] // 16-byte Folded Spill +; CHECK-SD-NEXT: .cfi_def_cfa_offset 64 +; CHECK-SD-NEXT: .cfi_offset w19, -8 +; CHECK-SD-NEXT: .cfi_offset w20, -16 +; CHECK-SD-NEXT: .cfi_offset w21, -24 +; CHECK-SD-NEXT: .cfi_offset w22, -32 +; CHECK-SD-NEXT: .cfi_offset w23, -40 +; CHECK-SD-NEXT: .cfi_offset w24, -48 +; CHECK-SD-NEXT: .cfi_offset w25, -64 +; CHECK-SD-NEXT: sxtb x9, w1 +; CHECK-SD-NEXT: cmp w2, #3 +; CHECK-SD-NEXT: mov w10, w2 +; CHECK-SD-NEXT: b.hi .LBB6_4 +; CHECK-SD-NEXT: // %bb.2: +; CHECK-SD-NEXT: mov x11, xzr +; CHECK-SD-NEXT: mov x8, xzr +; CHECK-SD-NEXT: b .LBB6_13 +; CHECK-SD-NEXT: .LBB6_3: +; CHECK-SD-NEXT: mov x0, xzr +; CHECK-SD-NEXT: ret +; CHECK-SD-NEXT: .LBB6_4: // %vector.main.loop.iter.check +; CHECK-SD-NEXT: dup v0.2d, x9 +; CHECK-SD-NEXT: cmp w2, #16 +; CHECK-SD-NEXT: b.hs .LBB6_6 +; CHECK-SD-NEXT: // %bb.5: +; CHECK-SD-NEXT: mov x11, xzr +; CHECK-SD-NEXT: mov x8, xzr +; CHECK-SD-NEXT: b .LBB6_10 +; CHECK-SD-NEXT: .LBB6_6: // %vector.ph +; CHECK-SD-NEXT: movi v1.2d, #0000000000000000 +; CHECK-SD-NEXT: mov x8, v0.d[1] +; CHECK-SD-NEXT: and x12, x10, #0xc +; CHECK-SD-NEXT: movi v2.2d, #0000000000000000 +; CHECK-SD-NEXT: movi v4.2d, #0000000000000000 +; CHECK-SD-NEXT: and x11, x10, #0xfffffff0 +; CHECK-SD-NEXT: movi v3.2d, #0000000000000000 +; CHECK-SD-NEXT: movi v7.2d, #0000000000000000 +; CHECK-SD-NEXT: mov x15, x0 +; CHECK-SD-NEXT: movi v5.2d, #0000000000000000 +; CHECK-SD-NEXT: movi v16.2d, #0000000000000000 +; CHECK-SD-NEXT: and x16, x10, #0xfffffff0 +; CHECK-SD-NEXT: movi v6.2d, #0000000000000000 +; CHECK-SD-NEXT: fmov x13, d0 +; CHECK-SD-NEXT: fmov x14, d0 +; CHECK-SD-NEXT: .LBB6_7: // %vector.body +; CHECK-SD-NEXT: // =>This Inner Loop Header: Depth=1 +; CHECK-SD-NEXT: ldr q17, [x15], #16 +; CHECK-SD-NEXT: subs x16, x16, #16 +; CHECK-SD-NEXT: ushll v18.8h, v17.8b, #0 +; CHECK-SD-NEXT: ushll2 v19.8h, v17.16b, #0 +; CHECK-SD-NEXT: ushll v17.4s, v18.4h, #0 +; CHECK-SD-NEXT: ushll2 v20.4s, v19.8h, #0 +; CHECK-SD-NEXT: ushll2 v18.4s, v18.8h, #0 +; CHECK-SD-NEXT: ushll v19.4s, v19.4h, #0 +; CHECK-SD-NEXT: ushll v21.2d, v17.2s, #0 +; CHECK-SD-NEXT: ushll2 v22.2d, v20.4s, #0 +; CHECK-SD-NEXT: ushll2 v17.2d, v17.4s, #0 +; CHECK-SD-NEXT: ushll v23.2d, v18.2s, #0 +; CHECK-SD-NEXT: ushll v20.2d, v20.2s, #0 +; CHECK-SD-NEXT: ushll2 v18.2d, v18.4s, #0 +; CHECK-SD-NEXT: fmov x17, d21 +; CHECK-SD-NEXT: mov x2, v21.d[1] +; CHECK-SD-NEXT: ushll v21.2d, v19.2s, #0 +; CHECK-SD-NEXT: ushll2 v19.2d, v19.4s, #0 +; CHECK-SD-NEXT: fmov x18, d22 +; CHECK-SD-NEXT: fmov x1, d17 +; CHECK-SD-NEXT: fmov x3, d23 +; CHECK-SD-NEXT: fmov x21, d20 +; CHECK-SD-NEXT: fmov x22, d18 +; CHECK-SD-NEXT: fmov x19, d21 +; CHECK-SD-NEXT: mul x17, x13, x17 +; CHECK-SD-NEXT: mov x4, v22.d[1] +; CHECK-SD-NEXT: fmov x24, d19 +; CHECK-SD-NEXT: mov x5, v23.d[1] +; CHECK-SD-NEXT: mov x6, v21.d[1] +; CHECK-SD-NEXT: mov x7, v20.d[1] +; CHECK-SD-NEXT: mov x20, v18.d[1] +; CHECK-SD-NEXT: mov x23, v19.d[1] +; CHECK-SD-NEXT: mov x25, v17.d[1] +; CHECK-SD-NEXT: mul x18, x14, x18 +; CHECK-SD-NEXT: mul x1, x13, x1 +; CHECK-SD-NEXT: fmov d17, x17 +; CHECK-SD-NEXT: mul x3, x13, x3 +; CHECK-SD-NEXT: fmov d18, x18 +; CHECK-SD-NEXT: mul x19, x13, x19 +; CHECK-SD-NEXT: fmov d19, x1 +; CHECK-SD-NEXT: mul x21, x13, x21 +; CHECK-SD-NEXT: fmov d20, x3 +; CHECK-SD-NEXT: mul x22, x13, x22 +; CHECK-SD-NEXT: fmov d21, x19 +; CHECK-SD-NEXT: mul x24, x13, x24 +; CHECK-SD-NEXT: fmov d24, x21 +; CHECK-SD-NEXT: mul x2, x8, x2 +; CHECK-SD-NEXT: fmov d22, x22 +; CHECK-SD-NEXT: mul x4, x8, x4 +; CHECK-SD-NEXT: fmov d23, x24 +; CHECK-SD-NEXT: mul x5, x8, x5 +; CHECK-SD-NEXT: mov v17.d[1], x2 +; CHECK-SD-NEXT: mul x6, x8, x6 +; CHECK-SD-NEXT: mov v18.d[1], x4 +; CHECK-SD-NEXT: mul x7, x8, x7 +; CHECK-SD-NEXT: mov v20.d[1], x5 +; CHECK-SD-NEXT: add v1.2d, v17.2d, v1.2d +; CHECK-SD-NEXT: mul x20, x8, x20 +; CHECK-SD-NEXT: mov v21.d[1], x6 +; CHECK-SD-NEXT: add v6.2d, v18.2d, v6.2d +; CHECK-SD-NEXT: mul x23, x8, x23 +; CHECK-SD-NEXT: mov v24.d[1], x7 +; CHECK-SD-NEXT: add v4.2d, v20.2d, v4.2d +; CHECK-SD-NEXT: mul x17, x8, x25 +; CHECK-SD-NEXT: mov v22.d[1], x20 +; CHECK-SD-NEXT: add v7.2d, v21.2d, v7.2d +; CHECK-SD-NEXT: mov v23.d[1], x23 +; CHECK-SD-NEXT: add v16.2d, v24.2d, v16.2d +; CHECK-SD-NEXT: mov v19.d[1], x17 +; CHECK-SD-NEXT: add v3.2d, v22.2d, v3.2d +; CHECK-SD-NEXT: add v5.2d, v23.2d, v5.2d +; CHECK-SD-NEXT: add v2.2d, v19.2d, v2.2d +; CHECK-SD-NEXT: b.ne .LBB6_7 +; CHECK-SD-NEXT: // %bb.8: // %middle.block +; CHECK-SD-NEXT: add v1.2d, v1.2d, v7.2d +; CHECK-SD-NEXT: add v4.2d, v4.2d, v16.2d +; CHECK-SD-NEXT: cmp x11, x10 +; CHECK-SD-NEXT: add v2.2d, v2.2d, v5.2d +; CHECK-SD-NEXT: add v3.2d, v3.2d, v6.2d +; CHECK-SD-NEXT: add v1.2d, v1.2d, v4.2d +; CHECK-SD-NEXT: add v2.2d, v2.2d, v3.2d +; CHECK-SD-NEXT: add v1.2d, v1.2d, v2.2d +; CHECK-SD-NEXT: addp d1, v1.2d +; CHECK-SD-NEXT: fmov x8, d1 +; CHECK-SD-NEXT: b.eq .LBB6_15 +; CHECK-SD-NEXT: // %bb.9: // %vec.epilog.iter.check +; CHECK-SD-NEXT: cbz x12, .LBB6_13 +; CHECK-SD-NEXT: .LBB6_10: // %vec.epilog.ph +; CHECK-SD-NEXT: movi v1.2d, #0000000000000000 +; CHECK-SD-NEXT: movi v2.2d, #0000000000000000 +; CHECK-SD-NEXT: mov x13, x11 +; CHECK-SD-NEXT: movi v3.2d, #0x000000000000ff +; CHECK-SD-NEXT: fmov x14, d0 +; CHECK-SD-NEXT: and x11, x10, #0xfffffffc +; CHECK-SD-NEXT: fmov x15, d0 +; CHECK-SD-NEXT: sub x12, x13, x11 +; CHECK-SD-NEXT: add x13, x0, x13 +; CHECK-SD-NEXT: mov v1.d[0], x8 +; CHECK-SD-NEXT: mov x8, v0.d[1] +; CHECK-SD-NEXT: .LBB6_11: // %vec.epilog.vector.body +; CHECK-SD-NEXT: // =>This Inner Loop Header: Depth=1 +; CHECK-SD-NEXT: ldr s0, [x13], #4 +; CHECK-SD-NEXT: adds x12, x12, #4 +; CHECK-SD-NEXT: ushll v0.8h, v0.8b, #0 +; CHECK-SD-NEXT: ushll v0.4s, v0.4h, #0 +; CHECK-SD-NEXT: ushll v4.2d, v0.2s, #0 +; CHECK-SD-NEXT: ushll2 v0.2d, v0.4s, #0 +; CHECK-SD-NEXT: and v4.16b, v4.16b, v3.16b +; CHECK-SD-NEXT: and v0.16b, v0.16b, v3.16b +; CHECK-SD-NEXT: fmov x16, d4 +; CHECK-SD-NEXT: fmov x18, d0 +; CHECK-SD-NEXT: mov x17, v4.d[1] +; CHECK-SD-NEXT: mov x1, v0.d[1] +; CHECK-SD-NEXT: mul x16, x14, x16 +; CHECK-SD-NEXT: mul x18, x15, x18 +; CHECK-SD-NEXT: mul x17, x8, x17 +; CHECK-SD-NEXT: fmov d0, x16 +; CHECK-SD-NEXT: mul x1, x8, x1 +; CHECK-SD-NEXT: fmov d4, x18 +; CHECK-SD-NEXT: mov v0.d[1], x17 +; CHECK-SD-NEXT: mov v4.d[1], x1 +; CHECK-SD-NEXT: add v1.2d, v0.2d, v1.2d +; CHECK-SD-NEXT: add v2.2d, v4.2d, v2.2d +; CHECK-SD-NEXT: b.ne .LBB6_11 +; CHECK-SD-NEXT: // %bb.12: // %vec.epilog.middle.block +; CHECK-SD-NEXT: add v0.2d, v1.2d, v2.2d +; CHECK-SD-NEXT: cmp x11, x10 +; CHECK-SD-NEXT: addp d0, v0.2d +; CHECK-SD-NEXT: fmov x8, d0 +; CHECK-SD-NEXT: b.eq .LBB6_15 +; CHECK-SD-NEXT: .LBB6_13: // %for.body.preheader +; CHECK-SD-NEXT: sub x10, x10, x11 +; CHECK-SD-NEXT: add x11, x0, x11 +; CHECK-SD-NEXT: .LBB6_14: // %for.body +; CHECK-SD-NEXT: // =>This Inner Loop Header: Depth=1 +; CHECK-SD-NEXT: ldrb w12, [x11], #1 +; CHECK-SD-NEXT: subs x10, x10, #1 +; CHECK-SD-NEXT: smaddl x8, w12, w9, x8 +; CHECK-SD-NEXT: b.ne .LBB6_14 +; CHECK-SD-NEXT: .LBB6_15: +; CHECK-SD-NEXT: ldp x20, x19, [sp, #48] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldp x22, x21, [sp, #32] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldp x24, x23, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldr x25, [sp], #64 // 8-byte Folded Reload +; CHECK-SD-NEXT: mov x0, x8 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: red_mla_dup_ext_u8_s8_s64: +; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: // kill: def $w1 killed $w1 def $x1 +; CHECK-GI-NEXT: cbz w2, .LBB6_7 +; CHECK-GI-NEXT: // %bb.1: // %iter.check +; CHECK-GI-NEXT: movi d0, #0000000000000000 +; CHECK-GI-NEXT: sxtb x9, w1 +; CHECK-GI-NEXT: mov x11, xzr +; CHECK-GI-NEXT: cmp w2, #4 +; CHECK-GI-NEXT: mov w10, w2 +; CHECK-GI-NEXT: b.lo .LBB6_12 +; CHECK-GI-NEXT: // %bb.2: // %vector.main.loop.iter.check +; CHECK-GI-NEXT: movi d0, #0000000000000000 +; CHECK-GI-NEXT: dup v1.2d, x9 +; CHECK-GI-NEXT: mov x11, xzr +; CHECK-GI-NEXT: cmp w2, #16 +; CHECK-GI-NEXT: b.lo .LBB6_9 +; CHECK-GI-NEXT: // %bb.3: // %vector.ph +; CHECK-GI-NEXT: movi v0.2d, #0000000000000000 +; CHECK-GI-NEXT: xtn v2.2s, v1.2d +; CHECK-GI-NEXT: and x8, x10, #0xc +; CHECK-GI-NEXT: movi v3.2d, #0000000000000000 +; CHECK-GI-NEXT: movi v4.2d, #0000000000000000 +; CHECK-GI-NEXT: and x11, x10, #0xfffffff0 +; CHECK-GI-NEXT: movi v5.2d, #0000000000000000 +; CHECK-GI-NEXT: movi v6.2d, #0000000000000000 +; CHECK-GI-NEXT: mov x12, x0 +; CHECK-GI-NEXT: movi v7.2d, #0000000000000000 +; CHECK-GI-NEXT: movi v16.2d, #0000000000000000 +; CHECK-GI-NEXT: and x13, x10, #0xfffffff0 +; CHECK-GI-NEXT: movi v17.2d, #0000000000000000 +; CHECK-GI-NEXT: .LBB6_4: // %vector.body +; CHECK-GI-NEXT: // =>This Inner Loop Header: Depth=1 +; CHECK-GI-NEXT: ldr q18, [x12], #16 +; CHECK-GI-NEXT: subs x13, x13, #16 +; CHECK-GI-NEXT: ushll v19.8h, v18.8b, #0 +; CHECK-GI-NEXT: ushll2 v18.8h, v18.16b, #0 +; CHECK-GI-NEXT: ushll v20.4s, v19.4h, #0 +; CHECK-GI-NEXT: ushll2 v19.4s, v19.8h, #0 +; CHECK-GI-NEXT: ushll v21.4s, v18.4h, #0 +; CHECK-GI-NEXT: ushll2 v18.4s, v18.8h, #0 +; CHECK-GI-NEXT: mov d22, v20.d[1] +; CHECK-GI-NEXT: mov d23, v19.d[1] +; CHECK-GI-NEXT: mov d24, v21.d[1] +; CHECK-GI-NEXT: mov d25, v18.d[1] +; CHECK-GI-NEXT: smlal v0.2d, v2.2s, v20.2s +; CHECK-GI-NEXT: smlal v4.2d, v2.2s, v19.2s +; CHECK-GI-NEXT: smlal v6.2d, v2.2s, v21.2s +; CHECK-GI-NEXT: smlal v16.2d, v2.2s, v18.2s +; CHECK-GI-NEXT: smlal v3.2d, v2.2s, v22.2s +; CHECK-GI-NEXT: smlal v5.2d, v2.2s, v23.2s +; CHECK-GI-NEXT: smlal v7.2d, v2.2s, v24.2s +; CHECK-GI-NEXT: smlal v17.2d, v2.2s, v25.2s +; CHECK-GI-NEXT: b.ne .LBB6_4 +; CHECK-GI-NEXT: // %bb.5: // %middle.block +; CHECK-GI-NEXT: add v0.2d, v0.2d, v3.2d +; CHECK-GI-NEXT: add v2.2d, v4.2d, v5.2d +; CHECK-GI-NEXT: cmp x11, x10 +; CHECK-GI-NEXT: add v3.2d, v6.2d, v7.2d +; CHECK-GI-NEXT: add v4.2d, v16.2d, v17.2d +; CHECK-GI-NEXT: add v0.2d, v0.2d, v2.2d +; CHECK-GI-NEXT: add v2.2d, v3.2d, v4.2d +; CHECK-GI-NEXT: add v0.2d, v0.2d, v2.2d +; CHECK-GI-NEXT: addp d0, v0.2d +; CHECK-GI-NEXT: b.ne .LBB6_8 +; CHECK-GI-NEXT: // %bb.6: +; CHECK-GI-NEXT: fmov x8, d0 +; CHECK-GI-NEXT: mov x0, x8 +; CHECK-GI-NEXT: ret +; CHECK-GI-NEXT: .LBB6_7: +; CHECK-GI-NEXT: mov x8, xzr +; CHECK-GI-NEXT: mov x0, x8 +; CHECK-GI-NEXT: ret +; CHECK-GI-NEXT: .LBB6_8: // %vec.epilog.iter.check +; CHECK-GI-NEXT: cbz x8, .LBB6_12 +; CHECK-GI-NEXT: .LBB6_9: // %vec.epilog.ph +; CHECK-GI-NEXT: mov v0.d[1], xzr +; CHECK-GI-NEXT: movi v2.2d, #0000000000000000 +; CHECK-GI-NEXT: mov x12, x11 +; CHECK-GI-NEXT: xtn v1.2s, v1.2d +; CHECK-GI-NEXT: and x11, x10, #0xfffffffc +; CHECK-GI-NEXT: sub x8, x12, x11 +; CHECK-GI-NEXT: add x12, x0, x12 +; CHECK-GI-NEXT: .LBB6_10: // %vec.epilog.vector.body +; CHECK-GI-NEXT: // =>This Inner Loop Header: Depth=1 +; CHECK-GI-NEXT: ldr w13, [x12], #4 +; CHECK-GI-NEXT: adds x8, x8, #4 +; CHECK-GI-NEXT: fmov s3, w13 +; CHECK-GI-NEXT: uxtb w13, w13 +; CHECK-GI-NEXT: mov b4, v3.b[2] +; CHECK-GI-NEXT: mov b5, v3.b[1] +; CHECK-GI-NEXT: mov b6, v3.b[3] +; CHECK-GI-NEXT: fmov s3, w13 +; CHECK-GI-NEXT: fmov w14, s4 +; CHECK-GI-NEXT: fmov w15, s5 +; CHECK-GI-NEXT: fmov w16, s6 +; CHECK-GI-NEXT: uxtb w14, w14 +; CHECK-GI-NEXT: uxtb w15, w15 +; CHECK-GI-NEXT: uxtb w16, w16 +; CHECK-GI-NEXT: fmov s4, w14 +; CHECK-GI-NEXT: mov v3.s[1], w15 +; CHECK-GI-NEXT: mov v4.s[1], w16 +; CHECK-GI-NEXT: smlal v0.2d, v1.2s, v3.2s +; CHECK-GI-NEXT: smlal v2.2d, v1.2s, v4.2s +; CHECK-GI-NEXT: b.ne .LBB6_10 +; CHECK-GI-NEXT: // %bb.11: // %vec.epilog.middle.block +; CHECK-GI-NEXT: add v0.2d, v0.2d, v2.2d +; CHECK-GI-NEXT: cmp x11, x10 +; CHECK-GI-NEXT: addp d0, v0.2d +; CHECK-GI-NEXT: fmov x8, d0 +; CHECK-GI-NEXT: b.eq .LBB6_14 +; CHECK-GI-NEXT: .LBB6_12: // %for.body.preheader +; CHECK-GI-NEXT: sub x10, x10, x11 +; CHECK-GI-NEXT: add x11, x0, x11 +; CHECK-GI-NEXT: .LBB6_13: // %for.body +; CHECK-GI-NEXT: // =>This Inner Loop Header: Depth=1 +; CHECK-GI-NEXT: ldrb w8, [x11], #1 +; CHECK-GI-NEXT: fmov x12, d0 +; CHECK-GI-NEXT: subs x10, x10, #1 +; CHECK-GI-NEXT: madd x8, x8, x9, x12 +; CHECK-GI-NEXT: fmov d0, x8 +; CHECK-GI-NEXT: b.ne .LBB6_13 +; CHECK-GI-NEXT: .LBB6_14: // %for.cond.cleanup +; CHECK-GI-NEXT: mov x0, x8 +; CHECK-GI-NEXT: ret +entry: + %cmp5.not = icmp eq i32 %n, 0 + br i1 %cmp5.not, label %for.cond.cleanup, label %iter.check + +iter.check: ; preds = %entry + %conv1 = sext i8 %B to i64 + %wide.trip.count = zext i32 %n to i64 + %min.iters.check = icmp ult i32 %n, 4 + br i1 %min.iters.check, label %for.body.preheader, label %vector.main.loop.iter.check + +vector.main.loop.iter.check: ; preds = %iter.check + %min.iters.check9 = icmp ult i32 %n, 16 + br i1 %min.iters.check9, label %vec.epilog.ph, label %vector.ph + +vector.ph: ; preds = %vector.main.loop.iter.check + %n.mod.vf = and i64 %wide.trip.count, 12 + %n.vec = and i64 %wide.trip.count, 4294967280 + %broadcast.splatinsert = insertelement <16 x i64> poison, i64 %conv1, i64 0 + %broadcast.splat = shufflevector <16 x i64> %broadcast.splatinsert, <16 x i64> poison, <16 x i32> zeroinitializer + br label %vector.body + +vector.body: ; preds = %vector.body, %vector.ph + %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] + %vec.phi = phi <16 x i64> [ zeroinitializer, %vector.ph ], [ %3, %vector.body ] + %0 = getelementptr inbounds nuw i8, ptr %A, i64 %index + %wide.load = load <16 x i8>, ptr %0, align 1 + %1 = zext <16 x i8> %wide.load to <16 x i64> + %2 = mul nsw <16 x i64> %broadcast.splat, %1 + %3 = add <16 x i64> %2, %vec.phi + %index.next = add nuw i64 %index, 16 + %4 = icmp eq i64 %index.next, %n.vec + br i1 %4, label %middle.block, label %vector.body + +middle.block: ; preds = %vector.body + %5 = tail call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %3) + %cmp.n = icmp eq i64 %n.vec, %wide.trip.count + br i1 %cmp.n, label %for.cond.cleanup, label %vec.epilog.iter.check + +vec.epilog.iter.check: ; preds = %middle.block + %min.epilog.iters.check = icmp eq i64 %n.mod.vf, 0 + br i1 %min.epilog.iters.check, label %for.body.preheader, label %vec.epilog.ph + +vec.epilog.ph: ; preds = %vector.main.loop.iter.check, %vec.epilog.iter.check + %vec.epilog.resume.val = phi i64 [ %n.vec, %vec.epilog.iter.check ], [ 0, %vector.main.loop.iter.check ] + %bc.merge.rdx = phi i64 [ %5, %vec.epilog.iter.check ], [ 0, %vector.main.loop.iter.check ] + %n.vec11 = and i64 %wide.trip.count, 4294967292 + %6 = insertelement <4 x i64> <i64 poison, i64 0, i64 0, i64 0>, i64 %bc.merge.rdx, i64 0 + %broadcast.splatinsert12 = insertelement <4 x i64> poison, i64 %conv1, i64 0 + %broadcast.splat13 = shufflevector <4 x i64> %broadcast.splatinsert12, <4 x i64> poison, <4 x i32> zeroinitializer + br label %vec.epilog.vector.body + +vec.epilog.vector.body: ; preds = %vec.epilog.vector.body, %vec.epilog.ph + %index14 = phi i64 [ %vec.epilog.resume.val, %vec.epilog.ph ], [ %index.next17, %vec.epilog.vector.body ] + %vec.phi15 = phi <4 x i64> [ %6, %vec.epilog.ph ], [ %10, %vec.epilog.vector.body ] + %7 = getelementptr inbounds nuw i8, ptr %A, i64 %index14 + %wide.load16 = load <4 x i8>, ptr %7, align 1 + %8 = zext <4 x i8> %wide.load16 to <4 x i64> + %9 = mul nsw <4 x i64> %broadcast.splat13, %8 + %10 = add <4 x i64> %9, %vec.phi15 + %index.next17 = add nuw i64 %index14, 4 + %11 = icmp eq i64 %index.next17, %n.vec11 + br i1 %11, label %vec.epilog.middle.block, label %vec.epilog.vector.body + +vec.epilog.middle.block: ; preds = %vec.epilog.vector.body + %12 = tail call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %10) + %cmp.n18 = icmp eq i64 %n.vec11, %wide.trip.count + br i1 %cmp.n18, label %for.cond.cleanup, label %for.body.preheader + +for.body.preheader: ; preds = %iter.check, %vec.epilog.iter.check, %vec.epilog.middle.block + %indvars.iv.ph = phi i64 [ 0, %iter.check ], [ %n.vec, %vec.epilog.iter.check ], [ %n.vec11, %vec.epilog.middle.block ] + %s.06.ph = phi i64 [ 0, %iter.check ], [ %5, %vec.epilog.iter.check ], [ %12, %vec.epilog.middle.block ] + br label %for.body + +for.cond.cleanup: ; preds = %for.body, %middle.block, %vec.epilog.middle.block, %entry + %s.0.lcssa = phi i64 [ 0, %entry ], [ %5, %middle.block ], [ %12, %vec.epilog.middle.block ], [ %add, %for.body ] + ret i64 %s.0.lcssa + +for.body: ; preds = %for.body.preheader, %for.body + %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ %indvars.iv.ph, %for.body.preheader ] + %s.06 = phi i64 [ %add, %for.body ], [ %s.06.ph, %for.body.preheader ] + %arrayidx = getelementptr inbounds nuw i8, ptr %A, i64 %indvars.iv + %13 = load i8, ptr %arrayidx, align 1 + %conv = zext i8 %13 to i64 + %mul = mul nsw i64 %conv, %conv1 + %add = add nsw i64 %mul, %s.06 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond.not = icmp eq i64 %indvars.iv.next, %wide.trip.count + br i1 %exitcond.not, label %for.cond.cleanup, label %for.body +} + define void @sink_v2z64_1(ptr %p, ptr %d, i64 %n, <2 x i32> %a) { ; CHECK-SD-LABEL: sink_v2z64_1: ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: mov x8, xzr ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-SD-NEXT: .LBB6_1: // %loop +; CHECK-SD-NEXT: .LBB7_1: // %loop ; CHECK-SD-NEXT: // =>This Inner Loop Header: Depth=1 ; CHECK-SD-NEXT: ldr d1, [x0] ; CHECK-SD-NEXT: subs x2, x2, #8 @@ -762,7 +1179,7 @@ define void @sink_v2z64_1(ptr %p, ptr %d, i64 %n, <2 x i32> %a) { ; CHECK-SD-NEXT: umull v1.2d, v1.2s, v0.s[1] ; CHECK-SD-NEXT: shrn v1.2s, v1.2d, #15 ; CHECK-SD-NEXT: str d1, [x0], #32 -; CHECK-SD-NEXT: b.ne .LBB6_1 +; CHECK-SD-NEXT: b.ne .LBB7_1 ; CHECK-SD-NEXT: // %bb.2: // %exit ; CHECK-SD-NEXT: ret ; @@ -772,7 +1189,7 @@ define void @sink_v2z64_1(ptr %p, ptr %d, i64 %n, <2 x i32> %a) { ; CHECK-GI-NEXT: mov x8, xzr ; CHECK-GI-NEXT: dup v0.2d, v0.d[1] ; CHECK-GI-NEXT: xtn v0.2s, v0.2d -; CHECK-GI-NEXT: .LBB6_1: // %loop +; CHECK-GI-NEXT: .LBB7_1: // %loop ; CHECK-GI-NEXT: // =>This Inner Loop Header: Depth=1 ; CHECK-GI-NEXT: ldr d1, [x0] ; CHECK-GI-NEXT: subs x2, x2, #8 @@ -780,7 +1197,7 @@ define void @sink_v2z64_1(ptr %p, ptr %d, i64 %n, <2 x i32> %a) { ; CHECK-GI-NEXT: umull v1.2d, v1.2s, v0.2s ; CHECK-GI-NEXT: shrn v1.2s, v1.2d, #15 ; CHECK-GI-NEXT: str d1, [x0], #32 -; CHECK-GI-NEXT: b.ne .LBB6_1 +; CHECK-GI-NEXT: b.ne .LBB7_1 ; CHECK-GI-NEXT: // %bb.2: // %exit ; CHECK-GI-NEXT: ret entry: @@ -813,7 +1230,7 @@ define void @sink_v4i64_1(ptr %p, ptr %d, i64 %n, <2 x i32> %a) { ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: mov x8, xzr ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-SD-NEXT: .LBB7_1: // %loop +; CHECK-SD-NEXT: .LBB8_1: // %loop ; CHECK-SD-NEXT: // =>This Inner Loop Header: Depth=1 ; CHECK-SD-NEXT: ldr q1, [x0] ; CHECK-SD-NEXT: subs x2, x2, #8 @@ -823,7 +1240,7 @@ define void @sink_v4i64_1(ptr %p, ptr %d, i64 %n, <2 x i32> %a) { ; CHECK-SD-NEXT: shrn v2.2s, v2.2d, #15 ; CHECK-SD-NEXT: shrn2 v2.4s, v1.2d, #15 ; CHECK-SD-NEXT: str q2, [x0], #32 -; CHECK-SD-NEXT: b.ne .LBB7_1 +; CHECK-SD-NEXT: b.ne .LBB8_1 ; CHECK-SD-NEXT: // %bb.2: // %exit ; CHECK-SD-NEXT: ret ; @@ -833,7 +1250,7 @@ define void @sink_v4i64_1(ptr %p, ptr %d, i64 %n, <2 x i32> %a) { ; CHECK-GI-NEXT: mov x8, xzr ; CHECK-GI-NEXT: dup v0.2d, v0.d[1] ; CHECK-GI-NEXT: xtn v0.2s, v0.2d -; CHECK-GI-NEXT: .LBB7_1: // %loop +; CHECK-GI-NEXT: .LBB8_1: // %loop ; CHECK-GI-NEXT: // =>This Inner Loop Header: Depth=1 ; CHECK-GI-NEXT: ldr q1, [x0] ; CHECK-GI-NEXT: subs x2, x2, #8 @@ -844,7 +1261,7 @@ define void @sink_v4i64_1(ptr %p, ptr %d, i64 %n, <2 x i32> %a) { ; CHECK-GI-NEXT: shrn v1.2s, v1.2d, #15 ; CHECK-GI-NEXT: shrn2 v1.4s, v2.2d, #15 ; CHECK-GI-NEXT: str q1, [x0], #32 -; CHECK-GI-NEXT: b.ne .LBB7_1 +; CHECK-GI-NEXT: b.ne .LBB8_1 ; CHECK-GI-NEXT: // %bb.2: // %exit ; CHECK-GI-NEXT: ret entry: @@ -877,7 +1294,7 @@ define void @sink_v8z16_0(ptr %p, ptr %d, i64 %n, <16 x i8> %a) { ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: dup v0.8b, v0.b[0] ; CHECK-SD-NEXT: mov x8, xzr -; CHECK-SD-NEXT: .LBB8_1: // %loop +; CHECK-SD-NEXT: .LBB9_1: // %loop ; CHECK-SD-NEXT: // =>This Inner Loop Header: Depth=1 ; CHECK-SD-NEXT: ldr d1, [x0] ; CHECK-SD-NEXT: subs x2, x2, #8 @@ -886,7 +1303,7 @@ define void @sink_v8z16_0(ptr %p, ptr %d, i64 %n, <16 x i8> %a) { ; CHECK-SD-NEXT: cmlt v1.8h, v1.8h, #0 ; CHECK-SD-NEXT: xtn v1.8b, v1.8h ; CHECK-SD-NEXT: str d1, [x0], #32 -; CHECK-SD-NEXT: b.ne .LBB8_1 +; CHECK-SD-NEXT: b.ne .LBB9_1 ; CHECK-SD-NEXT: // %bb.2: // %exit ; CHECK-SD-NEXT: ret ; @@ -896,7 +1313,7 @@ define void @sink_v8z16_0(ptr %p, ptr %d, i64 %n, <16 x i8> %a) { ; CHECK-GI-NEXT: mov x8, xzr ; CHECK-GI-NEXT: dup v0.8h, v0.h[0] ; CHECK-GI-NEXT: xtn v0.8b, v0.8h -; CHECK-GI-NEXT: .LBB8_1: // %loop +; CHECK-GI-NEXT: .LBB9_1: // %loop ; CHECK-GI-NEXT: // =>This Inner Loop Header: Depth=1 ; CHECK-GI-NEXT: ldr d1, [x0] ; CHECK-GI-NEXT: subs x2, x2, #8 @@ -905,7 +1322,7 @@ define void @sink_v8z16_0(ptr %p, ptr %d, i64 %n, <16 x i8> %a) { ; CHECK-GI-NEXT: cmlt v1.8h, v1.8h, #0 ; CHECK-GI-NEXT: xtn v1.8b, v1.8h ; CHECK-GI-NEXT: str d1, [x0], #32 -; CHECK-GI-NEXT: b.ne .LBB8_1 +; CHECK-GI-NEXT: b.ne .LBB9_1 ; CHECK-GI-NEXT: // %bb.2: // %exit ; CHECK-GI-NEXT: ret entry: @@ -938,7 +1355,7 @@ define void @sink_v16s16_8(ptr %p, ptr %d, i64 %n, <16 x i8> %a) { ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: dup v0.16b, v0.b[10] ; CHECK-SD-NEXT: mov x8, xzr -; CHECK-SD-NEXT: .LBB9_1: // %loop +; CHECK-SD-NEXT: .LBB10_1: // %loop ; CHECK-SD-NEXT: // =>This Inner Loop Header: Depth=1 ; CHECK-SD-NEXT: ldr q1, [x0] ; CHECK-SD-NEXT: subs x2, x2, #8 @@ -949,7 +1366,7 @@ define void @sink_v16s16_8(ptr %p, ptr %d, i64 %n, <16 x i8> %a) { ; CHECK-SD-NEXT: cmlt v2.8h, v2.8h, #0 ; CHECK-SD-NEXT: uzp1 v1.16b, v2.16b, v1.16b ; CHECK-SD-NEXT: str q1, [x0], #32 -; CHECK-SD-NEXT: b.ne .LBB9_1 +; CHECK-SD-NEXT: b.ne .LBB10_1 ; CHECK-SD-NEXT: // %bb.2: // %exit ; CHECK-SD-NEXT: ret ; @@ -959,7 +1376,7 @@ define void @sink_v16s16_8(ptr %p, ptr %d, i64 %n, <16 x i8> %a) { ; CHECK-GI-NEXT: mov x8, xzr ; CHECK-GI-NEXT: dup v0.8h, v0.h[2] ; CHECK-GI-NEXT: xtn v0.8b, v0.8h -; CHECK-GI-NEXT: .LBB9_1: // %loop +; CHECK-GI-NEXT: .LBB10_1: // %loop ; CHECK-GI-NEXT: // =>This Inner Loop Header: Depth=1 ; CHECK-GI-NEXT: ldr q1, [x0] ; CHECK-GI-NEXT: subs x2, x2, #8 @@ -971,7 +1388,7 @@ define void @sink_v16s16_8(ptr %p, ptr %d, i64 %n, <16 x i8> %a) { ; CHECK-GI-NEXT: cmlt v2.8h, v2.8h, #0 ; CHECK-GI-NEXT: uzp1 v1.16b, v1.16b, v2.16b ; CHECK-GI-NEXT: str q1, [x0], #32 -; CHECK-GI-NEXT: b.ne .LBB9_1 +; CHECK-GI-NEXT: b.ne .LBB10_1 ; CHECK-GI-NEXT: // %bb.2: // %exit ; CHECK-GI-NEXT: ret entry: @@ -1005,7 +1422,7 @@ define void @matrix_mul_unsigned_and(i32 %N, ptr nocapture %C, ptr nocapture rea ; CHECK-SD-NEXT: dup v0.4h, w3 ; CHECK-SD-NEXT: // kill: def $w0 killed $w0 def $x0 ; CHECK-SD-NEXT: and x8, x0, #0xfffffff8 -; CHECK-SD-NEXT: .LBB10_1: // %vector.body +; CHECK-SD-NEXT: .LBB11_1: // %vector.body ; CHECK-SD-NEXT: // =>This Inner Loop Header: Depth=1 ; CHECK-SD-NEXT: add x9, x2, w0, uxtw #1 ; CHECK-SD-NEXT: subs x8, x8, #8 @@ -1015,7 +1432,7 @@ define void @matrix_mul_unsigned_and(i32 %N, ptr nocapture %C, ptr nocapture rea ; CHECK-SD-NEXT: umull v1.4s, v0.4h, v1.4h ; CHECK-SD-NEXT: umull v2.4s, v0.4h, v2.4h ; CHECK-SD-NEXT: stp q1, q2, [x9] -; CHECK-SD-NEXT: b.ne .LBB10_1 +; CHECK-SD-NEXT: b.ne .LBB11_1 ; CHECK-SD-NEXT: // %bb.2: // %for.end12 ; CHECK-SD-NEXT: ret ; @@ -1026,7 +1443,7 @@ define void @matrix_mul_unsigned_and(i32 %N, ptr nocapture %C, ptr nocapture rea ; CHECK-GI-NEXT: mov w8, w0 ; CHECK-GI-NEXT: and x8, x8, #0xfffffff8 ; CHECK-GI-NEXT: xtn v0.4h, v0.4s -; CHECK-GI-NEXT: .LBB10_1: // %vector.body +; CHECK-GI-NEXT: .LBB11_1: // %vector.body ; CHECK-GI-NEXT: // =>This Inner Loop Header: Depth=1 ; CHECK-GI-NEXT: add x9, x2, w0, uxtw #1 ; CHECK-GI-NEXT: subs x8, x8, #8 @@ -1036,7 +1453,7 @@ define void @matrix_mul_unsigned_and(i32 %N, ptr nocapture %C, ptr nocapture rea ; CHECK-GI-NEXT: umull v1.4s, v0.4h, v1.4h ; CHECK-GI-NEXT: umull v2.4s, v0.4h, v2.4h ; CHECK-GI-NEXT: stp q1, q2, [x9] -; CHECK-GI-NEXT: b.ne .LBB10_1 +; CHECK-GI-NEXT: b.ne .LBB11_1 ; CHECK-GI-NEXT: // %bb.2: // %for.end12 ; CHECK-GI-NEXT: ret vector.header: @@ -1089,7 +1506,7 @@ define void @matrix_mul_unsigned_and_double(i32 %N, ptr nocapture %C, ptr nocapt ; CHECK-SD-NEXT: dup v0.8h, w3 ; CHECK-SD-NEXT: // kill: def $w0 killed $w0 def $x0 ; CHECK-SD-NEXT: and x8, x0, #0xfffffff0 -; CHECK-SD-NEXT: .LBB11_1: // %vector.body +; CHECK-SD-NEXT: .LBB12_1: // %vector.body ; CHECK-SD-NEXT: // =>This Inner Loop Header: Depth=1 ; CHECK-SD-NEXT: add x9, x2, w0, uxtw #1 ; CHECK-SD-NEXT: subs x8, x8, #16 @@ -1103,7 +1520,7 @@ define void @matrix_mul_unsigned_and_double(i32 %N, ptr nocapture %C, ptr nocapt ; CHECK-SD-NEXT: umull v2.4s, v0.4h, v2.4h ; CHECK-SD-NEXT: stp q1, q3, [x9] ; CHECK-SD-NEXT: stp q2, q4, [x9, #32] -; CHECK-SD-NEXT: b.ne .LBB11_1 +; CHECK-SD-NEXT: b.ne .LBB12_1 ; CHECK-SD-NEXT: // %bb.2: // %for.end12 ; CHECK-SD-NEXT: ret ; @@ -1114,7 +1531,7 @@ define void @matrix_mul_unsigned_and_double(i32 %N, ptr nocapture %C, ptr nocapt ; CHECK-GI-NEXT: mov w8, w0 ; CHECK-GI-NEXT: and x8, x8, #0xfffffff0 ; CHECK-GI-NEXT: xtn v0.4h, v0.4s -; CHECK-GI-NEXT: .LBB11_1: // %vector.body +; CHECK-GI-NEXT: .LBB12_1: // %vector.body ; CHECK-GI-NEXT: // =>This Inner Loop Header: Depth=1 ; CHECK-GI-NEXT: add x9, x2, w0, uxtw #1 ; CHECK-GI-NEXT: subs x8, x8, #16 @@ -1130,7 +1547,7 @@ define void @matrix_mul_unsigned_and_double(i32 %N, ptr nocapture %C, ptr nocapt ; CHECK-GI-NEXT: umull v4.4s, v0.4h, v4.4h ; CHECK-GI-NEXT: stp q1, q3, [x9] ; CHECK-GI-NEXT: stp q2, q4, [x9, #32]! -; CHECK-GI-NEXT: b.ne .LBB11_1 +; CHECK-GI-NEXT: b.ne .LBB12_1 ; CHECK-GI-NEXT: // %bb.2: // %for.end12 ; CHECK-GI-NEXT: ret vector.header: @@ -1184,7 +1601,7 @@ define void @matrix_mul_signed_and(i32 %N, ptr nocapture %C, ptr nocapture reado ; CHECK-SD-NEXT: // kill: def $w0 killed $w0 def $x0 ; CHECK-SD-NEXT: and x8, x0, #0xfffffff8 ; CHECK-SD-NEXT: fmov s0, w9 -; CHECK-SD-NEXT: .LBB12_1: // %vector.body +; CHECK-SD-NEXT: .LBB13_1: // %vector.body ; CHECK-SD-NEXT: // =>This Inner Loop Header: Depth=1 ; CHECK-SD-NEXT: add x9, x2, w0, uxtw #1 ; CHECK-SD-NEXT: subs x8, x8, #8 @@ -1196,7 +1613,7 @@ define void @matrix_mul_signed_and(i32 %N, ptr nocapture %C, ptr nocapture reado ; CHECK-SD-NEXT: mul v1.4s, v1.4s, v0.s[0] ; CHECK-SD-NEXT: mul v2.4s, v2.4s, v0.s[0] ; CHECK-SD-NEXT: stp q1, q2, [x9] -; CHECK-SD-NEXT: b.ne .LBB12_1 +; CHECK-SD-NEXT: b.ne .LBB13_1 ; CHECK-SD-NEXT: // %bb.2: // %for.end12 ; CHECK-SD-NEXT: ret ; @@ -1206,7 +1623,7 @@ define void @matrix_mul_signed_and(i32 %N, ptr nocapture %C, ptr nocapture reado ; CHECK-GI-NEXT: dup v0.4s, w8 ; CHECK-GI-NEXT: mov w8, w0 ; CHECK-GI-NEXT: and x8, x8, #0xfffffff8 -; CHECK-GI-NEXT: .LBB12_1: // %vector.body +; CHECK-GI-NEXT: .LBB13_1: // %vector.body ; CHECK-GI-NEXT: // =>This Inner Loop Header: Depth=1 ; CHECK-GI-NEXT: add x9, x2, w0, uxtw #1 ; CHECK-GI-NEXT: subs x8, x8, #8 @@ -1218,7 +1635,7 @@ define void @matrix_mul_signed_and(i32 %N, ptr nocapture %C, ptr nocapture reado ; CHECK-GI-NEXT: mul v1.4s, v0.4s, v1.4s ; CHECK-GI-NEXT: mul v2.4s, v0.4s, v2.4s ; CHECK-GI-NEXT: stp q1, q2, [x9] -; CHECK-GI-NEXT: b.ne .LBB12_1 +; CHECK-GI-NEXT: b.ne .LBB13_1 ; CHECK-GI-NEXT: // %bb.2: // %for.end12 ; CHECK-GI-NEXT: ret vector.header: @@ -1272,7 +1689,7 @@ define void @matrix_mul_signed_and_double(i32 %N, ptr nocapture %C, ptr nocaptur ; CHECK-SD-NEXT: // kill: def $w0 killed $w0 def $x0 ; CHECK-SD-NEXT: and x8, x0, #0xfffffff0 ; CHECK-SD-NEXT: fmov s0, w9 -; CHECK-SD-NEXT: .LBB13_1: // %vector.body +; CHECK-SD-NEXT: .LBB14_1: // %vector.body ; CHECK-SD-NEXT: // =>This Inner Loop Header: Depth=1 ; CHECK-SD-NEXT: add x9, x2, w0, uxtw #1 ; CHECK-SD-NEXT: subs x8, x8, #16 @@ -1290,7 +1707,7 @@ define void @matrix_mul_signed_and_double(i32 %N, ptr nocapture %C, ptr nocaptur ; CHECK-SD-NEXT: mul v2.4s, v2.4s, v0.s[0] ; CHECK-SD-NEXT: stp q1, q3, [x9] ; CHECK-SD-NEXT: stp q2, q4, [x9, #32] -; CHECK-SD-NEXT: b.ne .LBB13_1 +; CHECK-SD-NEXT: b.ne .LBB14_1 ; CHECK-SD-NEXT: // %bb.2: // %for.end12 ; CHECK-SD-NEXT: ret ; @@ -1300,7 +1717,7 @@ define void @matrix_mul_signed_and_double(i32 %N, ptr nocapture %C, ptr nocaptur ; CHECK-GI-NEXT: dup v0.4s, w8 ; CHECK-GI-NEXT: mov w8, w0 ; CHECK-GI-NEXT: and x8, x8, #0xfffffff0 -; CHECK-GI-NEXT: .LBB13_1: // %vector.body +; CHECK-GI-NEXT: .LBB14_1: // %vector.body ; CHECK-GI-NEXT: // =>This Inner Loop Header: Depth=1 ; CHECK-GI-NEXT: add x9, x2, w0, uxtw #1 ; CHECK-GI-NEXT: subs x8, x8, #16 @@ -1318,7 +1735,7 @@ define void @matrix_mul_signed_and_double(i32 %N, ptr nocapture %C, ptr nocaptur ; CHECK-GI-NEXT: mul v2.4s, v0.4s, v2.4s ; CHECK-GI-NEXT: stp q3, q1, [x9] ; CHECK-GI-NEXT: stp q4, q2, [x9, #32]! -; CHECK-GI-NEXT: b.ne .LBB13_1 +; CHECK-GI-NEXT: b.ne .LBB14_1 ; CHECK-GI-NEXT: // %bb.2: // %for.end12 ; CHECK-GI-NEXT: ret vector.header: @@ -1369,9 +1786,9 @@ define noundef <8 x i16> @cmplx_mul_combined_re_im(<8 x i16> noundef %a, i64 %sc ; CHECK-SD-LABEL: cmplx_mul_combined_re_im: ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: lsr x9, x0, #16 -; CHECK-SD-NEXT: adrp x8, .LCPI14_0 +; CHECK-SD-NEXT: adrp x8, .LCPI15_0 ; CHECK-SD-NEXT: dup v4.8h, w0 -; CHECK-SD-NEXT: ldr q3, [x8, :lo12:.LCPI14_0] +; CHECK-SD-NEXT: ldr q3, [x8, :lo12:.LCPI15_0] ; CHECK-SD-NEXT: dup v2.8h, w9 ; CHECK-SD-NEXT: sqneg v1.8h, v2.8h ; CHECK-SD-NEXT: tbl v1.16b, { v1.16b, v2.16b }, v3.16b @@ -1386,12 +1803,12 @@ define noundef <8 x i16> @cmplx_mul_combined_re_im(<8 x i16> noundef %a, i64 %sc ; CHECK-GI-LABEL: cmplx_mul_combined_re_im: ; CHECK-GI: // %bb.0: // %entry ; CHECK-GI-NEXT: lsr w9, w0, #16 -; CHECK-GI-NEXT: adrp x8, .LCPI14_0 +; CHECK-GI-NEXT: adrp x8, .LCPI15_0 ; CHECK-GI-NEXT: rev32 v4.8h, v0.8h ; CHECK-GI-NEXT: dup v1.8h, w9 ; CHECK-GI-NEXT: fmov s3, w9 ; CHECK-GI-NEXT: sqneg v2.8h, v1.8h -; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI14_0] +; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI15_0] ; CHECK-GI-NEXT: tbl v1.16b, { v2.16b, v3.16b }, v1.16b ; CHECK-GI-NEXT: mov d2, v0.d[1] ; CHECK-GI-NEXT: dup v3.8h, w0 diff --git a/llvm/test/CodeGen/AArch64/aarch64-smull.ll b/llvm/test/CodeGen/AArch64/aarch64-smull.ll index 6e5c666..0cd885e 100644 --- a/llvm/test/CodeGen/AArch64/aarch64-smull.ll +++ b/llvm/test/CodeGen/AArch64/aarch64-smull.ll @@ -222,22 +222,20 @@ define <4 x i32> @smull_zext_v4i16_v4i32(ptr %A, ptr %B) nounwind { define <2 x i64> @smull_zext_v2i32_v2i64(ptr %A, ptr %B) nounwind { ; CHECK-NEON-LABEL: smull_zext_v2i32_v2i64: ; CHECK-NEON: // %bb.0: -; CHECK-NEON-NEXT: ldrh w8, [x0] -; CHECK-NEON-NEXT: ldrh w9, [x0, #2] +; CHECK-NEON-NEXT: ldrh w8, [x0, #2] +; CHECK-NEON-NEXT: ldr h0, [x0] ; CHECK-NEON-NEXT: ldr d1, [x1] -; CHECK-NEON-NEXT: fmov d0, x8 -; CHECK-NEON-NEXT: mov v0.d[1], x9 +; CHECK-NEON-NEXT: mov v0.d[1], x8 ; CHECK-NEON-NEXT: xtn v0.2s, v0.2d ; CHECK-NEON-NEXT: smull v0.2d, v0.2s, v1.2s ; CHECK-NEON-NEXT: ret ; ; CHECK-SVE-LABEL: smull_zext_v2i32_v2i64: ; CHECK-SVE: // %bb.0: -; CHECK-SVE-NEXT: ldrh w8, [x0] -; CHECK-SVE-NEXT: ldrh w9, [x0, #2] +; CHECK-SVE-NEXT: ldrh w8, [x0, #2] +; CHECK-SVE-NEXT: ldr h0, [x0] ; CHECK-SVE-NEXT: ldr d1, [x1] -; CHECK-SVE-NEXT: fmov d0, x8 -; CHECK-SVE-NEXT: mov v0.d[1], x9 +; CHECK-SVE-NEXT: mov v0.d[1], x8 ; CHECK-SVE-NEXT: xtn v0.2s, v0.2d ; CHECK-SVE-NEXT: smull v0.2d, v0.2s, v1.2s ; CHECK-SVE-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/arm64-cvt-simd-fptoi.ll b/llvm/test/CodeGen/AArch64/arm64-cvt-simd-fptoi.ll new file mode 100644 index 0000000..a729772 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/arm64-cvt-simd-fptoi.ll @@ -0,0 +1,1943 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc < %s -mtriple aarch64-unknown-unknown -mattr=+fullfp16 | FileCheck %s --check-prefixes=CHECK-NOFPRCVT +; RUN: llc < %s -mtriple aarch64-unknown-unknown -mattr=+fprcvt,+fullfp16 | FileCheck %s --check-prefixes=CHECK +; RUN: llc < %s -mtriple aarch64-unknown-unknown -global-isel -global-isel-abort=2 -mattr=+fprcvt,+fullfp16 2>&1 | FileCheck %s --check-prefixes=CHECK + +; CHECK-GI: warning: Instruction selection used fallback path for fptosi_i32_f16_simd +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptosi_i64_f16_simd +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptosi_i64_f32_simd +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptosi_i32_f64_simd +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptosi_i64_f64_simd +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptosi_i32_f32_simd +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptoui_i32_f16_simd +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptoui_i64_f16_simd +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptoui_i64_f32_simd +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptoui_i32_f64_simd +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptoui_i64_f64_simd +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptoui_i32_f32_simd + +; +; FPTOI +; + +define float @test_fptosi_f16_i32_simd(half %a) { +; CHECK-NOFPRCVT-LABEL: test_fptosi_f16_i32_simd: +; CHECK-NOFPRCVT: // %bb.0: +; CHECK-NOFPRCVT-NEXT: fcvtzs w8, h0 +; CHECK-NOFPRCVT-NEXT: fmov s0, w8 +; CHECK-NOFPRCVT-NEXT: ret +; +; CHECK-LABEL: test_fptosi_f16_i32_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtzs s0, h0 +; CHECK-NEXT: ret + %r = fptosi half %a to i32 + %bc = bitcast i32 %r to float + ret float %bc +} + +define double @test_fptosi_f16_i64_simd(half %a) { +; CHECK-NOFPRCVT-LABEL: test_fptosi_f16_i64_simd: +; CHECK-NOFPRCVT: // %bb.0: +; CHECK-NOFPRCVT-NEXT: fcvtzs x8, h0 +; CHECK-NOFPRCVT-NEXT: fmov d0, x8 +; CHECK-NOFPRCVT-NEXT: ret +; +; CHECK-LABEL: test_fptosi_f16_i64_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtzs d0, h0 +; CHECK-NEXT: ret + %r = fptosi half %a to i64 + %bc = bitcast i64 %r to double + ret double %bc +} + +define float @test_fptosi_f64_i32_simd(double %a) { +; CHECK-NOFPRCVT-LABEL: test_fptosi_f64_i32_simd: +; CHECK-NOFPRCVT: // %bb.0: +; CHECK-NOFPRCVT-NEXT: fcvtzs w8, d0 +; CHECK-NOFPRCVT-NEXT: fmov s0, w8 +; CHECK-NOFPRCVT-NEXT: ret +; +; CHECK-LABEL: test_fptosi_f64_i32_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtzs s0, d0 +; CHECK-NEXT: ret + %r = fptosi double %a to i32 + %bc = bitcast i32 %r to float + ret float %bc +} + +define double @test_fptosi_f32_i64_simd(float %a) { +; CHECK-NOFPRCVT-LABEL: test_fptosi_f32_i64_simd: +; CHECK-NOFPRCVT: // %bb.0: +; CHECK-NOFPRCVT-NEXT: fcvtzs x8, s0 +; CHECK-NOFPRCVT-NEXT: fmov d0, x8 +; CHECK-NOFPRCVT-NEXT: ret +; +; CHECK-LABEL: test_fptosi_f32_i64_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtzs d0, s0 +; CHECK-NEXT: ret + %r = fptosi float %a to i64 + %bc = bitcast i64 %r to double + ret double %bc +} + +define double @test_fptosi_f64_i64_simd(double %a) { +; CHECK-NOFPRCVT-LABEL: test_fptosi_f64_i64_simd: +; CHECK-NOFPRCVT: // %bb.0: +; CHECK-NOFPRCVT-NEXT: fcvtzs d0, d0 +; CHECK-NOFPRCVT-NEXT: ret +; +; CHECK-LABEL: test_fptosi_f64_i64_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtzs d0, d0 +; CHECK-NEXT: ret + %r = fptosi double %a to i64 + %bc = bitcast i64 %r to double + ret double %bc +} + + +define float @test_fptosi_f32_i32_simd(float %a) { +; CHECK-NOFPRCVT-LABEL: test_fptosi_f32_i32_simd: +; CHECK-NOFPRCVT: // %bb.0: +; CHECK-NOFPRCVT-NEXT: fcvtzs s0, s0 +; CHECK-NOFPRCVT-NEXT: ret +; +; CHECK-LABEL: test_fptosi_f32_i32_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtzs s0, s0 +; CHECK-NEXT: ret + %r = fptosi float %a to i32 + %bc = bitcast i32 %r to float + ret float %bc +} + +define float @test_fptoui_f16_i32_simd(half %a) { +; CHECK-NOFPRCVT-LABEL: test_fptoui_f16_i32_simd: +; CHECK-NOFPRCVT: // %bb.0: +; CHECK-NOFPRCVT-NEXT: fcvtzu w8, h0 +; CHECK-NOFPRCVT-NEXT: fmov s0, w8 +; CHECK-NOFPRCVT-NEXT: ret +; +; CHECK-LABEL: test_fptoui_f16_i32_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtzu s0, h0 +; CHECK-NEXT: ret + %r = fptoui half %a to i32 + %bc = bitcast i32 %r to float + ret float %bc +} + +define double @test_fptoui_f16_i64_simd(half %a) { +; CHECK-NOFPRCVT-LABEL: test_fptoui_f16_i64_simd: +; CHECK-NOFPRCVT: // %bb.0: +; CHECK-NOFPRCVT-NEXT: fcvtzu x8, h0 +; CHECK-NOFPRCVT-NEXT: fmov d0, x8 +; CHECK-NOFPRCVT-NEXT: ret +; +; CHECK-LABEL: test_fptoui_f16_i64_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtzu d0, h0 +; CHECK-NEXT: ret + %r = fptoui half %a to i64 + %bc = bitcast i64 %r to double + ret double %bc +} + +define float @test_fptoui_f64_i32_simd(double %a) { +; CHECK-NOFPRCVT-LABEL: test_fptoui_f64_i32_simd: +; CHECK-NOFPRCVT: // %bb.0: +; CHECK-NOFPRCVT-NEXT: fcvtzu w8, d0 +; CHECK-NOFPRCVT-NEXT: fmov s0, w8 +; CHECK-NOFPRCVT-NEXT: ret +; +; CHECK-LABEL: test_fptoui_f64_i32_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtzu s0, d0 +; CHECK-NEXT: ret + %r = fptoui double %a to i32 + %bc = bitcast i32 %r to float + ret float %bc +} + +define double @test_fptoui_f32_i64_simd(float %a) { +; CHECK-NOFPRCVT-LABEL: test_fptoui_f32_i64_simd: +; CHECK-NOFPRCVT: // %bb.0: +; CHECK-NOFPRCVT-NEXT: fcvtzu x8, s0 +; CHECK-NOFPRCVT-NEXT: fmov d0, x8 +; CHECK-NOFPRCVT-NEXT: ret +; +; CHECK-LABEL: test_fptoui_f32_i64_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtzu d0, s0 +; CHECK-NEXT: ret + %r = fptoui float %a to i64 + %bc = bitcast i64 %r to double + ret double %bc +} + +define double @test_fptoui_f64_i64_simd(double %a) { +; CHECK-NOFPRCVT-LABEL: test_fptoui_f64_i64_simd: +; CHECK-NOFPRCVT: // %bb.0: +; CHECK-NOFPRCVT-NEXT: fcvtzu d0, d0 +; CHECK-NOFPRCVT-NEXT: ret +; +; CHECK-LABEL: test_fptoui_f64_i64_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtzu d0, d0 +; CHECK-NEXT: ret + %r = fptoui double %a to i64 + %bc = bitcast i64 %r to double + ret double %bc +} + + +define float @test_fptoui_f32_i32_simd(float %a) { +; CHECK-NOFPRCVT-LABEL: test_fptoui_f32_i32_simd: +; CHECK-NOFPRCVT: // %bb.0: +; CHECK-NOFPRCVT-NEXT: fcvtzu s0, s0 +; CHECK-NOFPRCVT-NEXT: ret +; +; CHECK-LABEL: test_fptoui_f32_i32_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtzu s0, s0 +; CHECK-NEXT: ret + %r = fptoui float %a to i32 + %bc = bitcast i32 %r to float + ret float %bc +} + + +; +; FPTOI strictfp +; + +define float @fptosi_i32_f16_simd(half %x) { +; CHECK-NOFPRCVT-LABEL: fptosi_i32_f16_simd: +; CHECK-NOFPRCVT: // %bb.0: +; CHECK-NOFPRCVT-NEXT: fcvtzs w8, h0 +; CHECK-NOFPRCVT-NEXT: fmov s0, w8 +; CHECK-NOFPRCVT-NEXT: ret +; +; CHECK-LABEL: fptosi_i32_f16_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtzs s0, h0 +; CHECK-NEXT: ret + %val = call i32 @llvm.experimental.constrained.fptosi.i32.f16(half %x, metadata !"fpexcept.strict") + %sum = bitcast i32 %val to float + ret float %sum +} + +define double @fptosi_i64_f16_simd(half %x) { +; CHECK-NOFPRCVT-LABEL: fptosi_i64_f16_simd: +; CHECK-NOFPRCVT: // %bb.0: +; CHECK-NOFPRCVT-NEXT: fcvtzs x8, h0 +; CHECK-NOFPRCVT-NEXT: fmov d0, x8 +; CHECK-NOFPRCVT-NEXT: ret +; +; CHECK-LABEL: fptosi_i64_f16_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtzs d0, h0 +; CHECK-NEXT: ret + %val = call i64 @llvm.experimental.constrained.fptosi.i64.f16(half %x, metadata !"fpexcept.strict") + %sum = bitcast i64 %val to double + ret double %sum +} + +define double @fptosi_i64_f32_simd(float %x) { +; CHECK-NOFPRCVT-LABEL: fptosi_i64_f32_simd: +; CHECK-NOFPRCVT: // %bb.0: +; CHECK-NOFPRCVT-NEXT: fcvtzs x8, s0 +; CHECK-NOFPRCVT-NEXT: fmov d0, x8 +; CHECK-NOFPRCVT-NEXT: ret +; +; CHECK-LABEL: fptosi_i64_f32_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtzs d0, s0 +; CHECK-NEXT: ret + %val = call i64 @llvm.experimental.constrained.fptosi.i64.f32(float %x, metadata !"fpexcept.strict") + %bc = bitcast i64 %val to double + ret double %bc +} + +define float @fptosi_i32_f64_simd(double %x) { +; CHECK-NOFPRCVT-LABEL: fptosi_i32_f64_simd: +; CHECK-NOFPRCVT: // %bb.0: +; CHECK-NOFPRCVT-NEXT: fcvtzs w8, d0 +; CHECK-NOFPRCVT-NEXT: fmov s0, w8 +; CHECK-NOFPRCVT-NEXT: ret +; +; CHECK-LABEL: fptosi_i32_f64_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtzs s0, d0 +; CHECK-NEXT: ret + %val = call i32 @llvm.experimental.constrained.fptosi.i32.f64(double %x, metadata !"fpexcept.strict") + %bc = bitcast i32 %val to float + ret float %bc +} + +define double @fptosi_i64_f64_simd(double %x) { +; CHECK-NOFPRCVT-LABEL: fptosi_i64_f64_simd: +; CHECK-NOFPRCVT: // %bb.0: +; CHECK-NOFPRCVT-NEXT: fcvtzs d0, d0 +; CHECK-NOFPRCVT-NEXT: ret +; +; CHECK-LABEL: fptosi_i64_f64_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtzs d0, d0 +; CHECK-NEXT: ret + %val = call i64 @llvm.experimental.constrained.fptosi.i64.f64(double %x, metadata !"fpexcept.strict") + %bc = bitcast i64 %val to double + ret double %bc +} + +define float @fptosi_i32_f32_simd(float %x) { +; CHECK-NOFPRCVT-LABEL: fptosi_i32_f32_simd: +; CHECK-NOFPRCVT: // %bb.0: +; CHECK-NOFPRCVT-NEXT: fcvtzs s0, s0 +; CHECK-NOFPRCVT-NEXT: ret +; +; CHECK-LABEL: fptosi_i32_f32_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtzs s0, s0 +; CHECK-NEXT: ret + %val = call i32 @llvm.experimental.constrained.fptosi.i32.f32(float %x, metadata !"fpexcept.strict") + %bc = bitcast i32 %val to float + ret float %bc +} + + + +define float @fptoui_i32_f16_simd(half %x) { +; CHECK-NOFPRCVT-LABEL: fptoui_i32_f16_simd: +; CHECK-NOFPRCVT: // %bb.0: +; CHECK-NOFPRCVT-NEXT: fcvtzu w8, h0 +; CHECK-NOFPRCVT-NEXT: fmov s0, w8 +; CHECK-NOFPRCVT-NEXT: ret +; +; CHECK-LABEL: fptoui_i32_f16_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtzu s0, h0 +; CHECK-NEXT: ret + %val = call i32 @llvm.experimental.constrained.fptoui.i32.f16(half %x, metadata !"fpexcept.strict") + %sum = bitcast i32 %val to float + ret float %sum +} + +define double @fptoui_i64_f16_simd(half %x) { +; CHECK-NOFPRCVT-LABEL: fptoui_i64_f16_simd: +; CHECK-NOFPRCVT: // %bb.0: +; CHECK-NOFPRCVT-NEXT: fcvtzu x8, h0 +; CHECK-NOFPRCVT-NEXT: fmov d0, x8 +; CHECK-NOFPRCVT-NEXT: ret +; +; CHECK-LABEL: fptoui_i64_f16_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtzu d0, h0 +; CHECK-NEXT: ret + %val = call i64 @llvm.experimental.constrained.fptoui.i64.f16(half %x, metadata !"fpexcept.strict") + %sum = bitcast i64 %val to double + ret double %sum +} + +define double @fptoui_i64_f32_simd(float %x) { +; CHECK-NOFPRCVT-LABEL: fptoui_i64_f32_simd: +; CHECK-NOFPRCVT: // %bb.0: +; CHECK-NOFPRCVT-NEXT: fcvtzu x8, s0 +; CHECK-NOFPRCVT-NEXT: fmov d0, x8 +; CHECK-NOFPRCVT-NEXT: ret +; +; CHECK-LABEL: fptoui_i64_f32_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtzu d0, s0 +; CHECK-NEXT: ret + %val = call i64 @llvm.experimental.constrained.fptoui.i64.f32(float %x, metadata !"fpexcept.strict") + %bc = bitcast i64 %val to double + ret double %bc +} + +define float @fptoui_i32_f64_simd(double %x) { +; CHECK-NOFPRCVT-LABEL: fptoui_i32_f64_simd: +; CHECK-NOFPRCVT: // %bb.0: +; CHECK-NOFPRCVT-NEXT: fcvtzu w8, d0 +; CHECK-NOFPRCVT-NEXT: fmov s0, w8 +; CHECK-NOFPRCVT-NEXT: ret +; +; CHECK-LABEL: fptoui_i32_f64_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtzu s0, d0 +; CHECK-NEXT: ret + %val = call i32 @llvm.experimental.constrained.fptoui.i32.f64(double %x, metadata !"fpexcept.strict") + %bc = bitcast i32 %val to float + ret float %bc +} + +define double @fptoui_i64_f64_simd(double %x) { +; CHECK-NOFPRCVT-LABEL: fptoui_i64_f64_simd: +; CHECK-NOFPRCVT: // %bb.0: +; CHECK-NOFPRCVT-NEXT: fcvtzu d0, d0 +; CHECK-NOFPRCVT-NEXT: ret +; +; CHECK-LABEL: fptoui_i64_f64_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtzu d0, d0 +; CHECK-NEXT: ret + %val = call i64 @llvm.experimental.constrained.fptoui.i64.f64(double %x, metadata !"fpexcept.strict") + %bc = bitcast i64 %val to double + ret double %bc +} + +define float @fptoui_i32_f32_simd(float %x) { +; CHECK-NOFPRCVT-LABEL: fptoui_i32_f32_simd: +; CHECK-NOFPRCVT: // %bb.0: +; CHECK-NOFPRCVT-NEXT: fcvtzu s0, s0 +; CHECK-NOFPRCVT-NEXT: ret +; +; CHECK-LABEL: fptoui_i32_f32_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtzu s0, s0 +; CHECK-NEXT: ret + %val = call i32 @llvm.experimental.constrained.fptoui.i32.f32(float %x, metadata !"fpexcept.strict") + %bc = bitcast i32 %val to float + ret float %bc +} + +; +; FPTOI rounding +; + + +define double @fcvtas_ds_round_simd(float %a) { +; CHECK-NOFPRCVT-LABEL: fcvtas_ds_round_simd: +; CHECK-NOFPRCVT: // %bb.0: +; CHECK-NOFPRCVT-NEXT: fcvtas x8, s0 +; CHECK-NOFPRCVT-NEXT: fmov d0, x8 +; CHECK-NOFPRCVT-NEXT: ret +; +; CHECK-LABEL: fcvtas_ds_round_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtas d0, s0 +; CHECK-NEXT: ret + %r = call float @llvm.round.f32(float %a) + %i = fptosi float %r to i64 + %bc = bitcast i64 %i to double + ret double %bc +} + +define float @fcvtas_sd_round_simd(double %a) { +; CHECK-NOFPRCVT-LABEL: fcvtas_sd_round_simd: +; CHECK-NOFPRCVT: // %bb.0: +; CHECK-NOFPRCVT-NEXT: fcvtas w8, d0 +; CHECK-NOFPRCVT-NEXT: fmov s0, w8 +; CHECK-NOFPRCVT-NEXT: ret +; +; CHECK-LABEL: fcvtas_sd_round_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtas s0, d0 +; CHECK-NEXT: ret + %r = call double @llvm.round.f64(double %a) + %i = fptosi double %r to i32 + %bc = bitcast i32 %i to float + ret float %bc +} + +define float @fcvtas_ss_round_simd(float %a) { +; CHECK-NOFPRCVT-LABEL: fcvtas_ss_round_simd: +; CHECK-NOFPRCVT: // %bb.0: +; CHECK-NOFPRCVT-NEXT: fcvtas s0, s0 +; CHECK-NOFPRCVT-NEXT: ret +; +; CHECK-LABEL: fcvtas_ss_round_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtas s0, s0 +; CHECK-NEXT: ret + %r = call float @llvm.round.f32(float %a) + %i = fptosi float %r to i32 + %bc = bitcast i32 %i to float + ret float %bc +} + +define double @fcvtas_dd_round_simd(double %a) { +; CHECK-NOFPRCVT-LABEL: fcvtas_dd_round_simd: +; CHECK-NOFPRCVT: // %bb.0: +; CHECK-NOFPRCVT-NEXT: fcvtas d0, d0 +; CHECK-NOFPRCVT-NEXT: ret +; +; CHECK-LABEL: fcvtas_dd_round_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtas d0, d0 +; CHECK-NEXT: ret + %r = call double @llvm.round.f64(double %a) + %i = fptosi double %r to i64 + %bc = bitcast i64 %i to double + ret double %bc +} + + +define double @fcvtau_ds_round_simd(float %a) { +; CHECK-NOFPRCVT-LABEL: fcvtau_ds_round_simd: +; CHECK-NOFPRCVT: // %bb.0: +; CHECK-NOFPRCVT-NEXT: fcvtau x8, s0 +; CHECK-NOFPRCVT-NEXT: fmov d0, x8 +; CHECK-NOFPRCVT-NEXT: ret +; +; CHECK-LABEL: fcvtau_ds_round_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtau d0, s0 +; CHECK-NEXT: ret + %r = call float @llvm.round.f32(float %a) + %i = fptoui float %r to i64 + %bc = bitcast i64 %i to double + ret double %bc +} + +define float @fcvtau_sd_round_simd(double %a) { +; CHECK-NOFPRCVT-LABEL: fcvtau_sd_round_simd: +; CHECK-NOFPRCVT: // %bb.0: +; CHECK-NOFPRCVT-NEXT: fcvtau w8, d0 +; CHECK-NOFPRCVT-NEXT: fmov s0, w8 +; CHECK-NOFPRCVT-NEXT: ret +; +; CHECK-LABEL: fcvtau_sd_round_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtau s0, d0 +; CHECK-NEXT: ret + %r = call double @llvm.round.f64(double %a) + %i = fptoui double %r to i32 + %bc = bitcast i32 %i to float + ret float %bc +} + +define float @fcvtau_ss_round_simd(float %a) { +; CHECK-NOFPRCVT-LABEL: fcvtau_ss_round_simd: +; CHECK-NOFPRCVT: // %bb.0: +; CHECK-NOFPRCVT-NEXT: fcvtas s0, s0 +; CHECK-NOFPRCVT-NEXT: ret +; +; CHECK-LABEL: fcvtau_ss_round_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtas s0, s0 +; CHECK-NEXT: ret + %r = call float @llvm.round.f32(float %a) + %i = fptosi float %r to i32 + %bc = bitcast i32 %i to float + ret float %bc +} + +define double @fcvtau_dd_round_simd(double %a) { +; CHECK-NOFPRCVT-LABEL: fcvtau_dd_round_simd: +; CHECK-NOFPRCVT: // %bb.0: +; CHECK-NOFPRCVT-NEXT: fcvtas d0, d0 +; CHECK-NOFPRCVT-NEXT: ret +; +; CHECK-LABEL: fcvtau_dd_round_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtas d0, d0 +; CHECK-NEXT: ret + %r = call double @llvm.round.f64(double %a) + %i = fptosi double %r to i64 + %bc = bitcast i64 %i to double + ret double %bc +} + + +define double @fcvtms_ds_round_simd(float %a) { +; CHECK-NOFPRCVT-LABEL: fcvtms_ds_round_simd: +; CHECK-NOFPRCVT: // %bb.0: +; CHECK-NOFPRCVT-NEXT: fcvtms x8, s0 +; CHECK-NOFPRCVT-NEXT: fmov d0, x8 +; CHECK-NOFPRCVT-NEXT: ret +; +; CHECK-LABEL: fcvtms_ds_round_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtms d0, s0 +; CHECK-NEXT: ret + %r = call float @llvm.floor.f32(float %a) + %i = fptosi float %r to i64 + %bc = bitcast i64 %i to double + ret double %bc +} + +define float @fcvtms_sd_round_simd(double %a) { +; CHECK-NOFPRCVT-LABEL: fcvtms_sd_round_simd: +; CHECK-NOFPRCVT: // %bb.0: +; CHECK-NOFPRCVT-NEXT: fcvtms w8, d0 +; CHECK-NOFPRCVT-NEXT: fmov s0, w8 +; CHECK-NOFPRCVT-NEXT: ret +; +; CHECK-LABEL: fcvtms_sd_round_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtms s0, d0 +; CHECK-NEXT: ret + %r = call double @llvm.floor.f64(double %a) + %i = fptosi double %r to i32 + %bc = bitcast i32 %i to float + ret float %bc +} + +define float @fcvtms_ss_round_simd(float %a) { +; CHECK-NOFPRCVT-LABEL: fcvtms_ss_round_simd: +; CHECK-NOFPRCVT: // %bb.0: +; CHECK-NOFPRCVT-NEXT: fcvtms s0, s0 +; CHECK-NOFPRCVT-NEXT: ret +; +; CHECK-LABEL: fcvtms_ss_round_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtms s0, s0 +; CHECK-NEXT: ret + %r = call float @llvm.floor.f32(float %a) + %i = fptosi float %r to i32 + %bc = bitcast i32 %i to float + ret float %bc +} + +define double @fcvtms_dd_round_simd(double %a) { +; CHECK-NOFPRCVT-LABEL: fcvtms_dd_round_simd: +; CHECK-NOFPRCVT: // %bb.0: +; CHECK-NOFPRCVT-NEXT: fcvtms d0, d0 +; CHECK-NOFPRCVT-NEXT: ret +; +; CHECK-LABEL: fcvtms_dd_round_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtms d0, d0 +; CHECK-NEXT: ret + %r = call double @llvm.floor.f64(double %a) + %i = fptosi double %r to i64 + %bc = bitcast i64 %i to double + ret double %bc +} + + + +define double @fcvtmu_ds_round_simd(float %a) { +; CHECK-NOFPRCVT-LABEL: fcvtmu_ds_round_simd: +; CHECK-NOFPRCVT: // %bb.0: +; CHECK-NOFPRCVT-NEXT: fcvtmu x8, s0 +; CHECK-NOFPRCVT-NEXT: fmov d0, x8 +; CHECK-NOFPRCVT-NEXT: ret +; +; CHECK-LABEL: fcvtmu_ds_round_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtmu d0, s0 +; CHECK-NEXT: ret + %r = call float @llvm.floor.f32(float %a) + %i = fptoui float %r to i64 + %bc = bitcast i64 %i to double + ret double %bc +} + +define float @fcvtmu_sd_round_simd(double %a) { +; CHECK-NOFPRCVT-LABEL: fcvtmu_sd_round_simd: +; CHECK-NOFPRCVT: // %bb.0: +; CHECK-NOFPRCVT-NEXT: fcvtmu w8, d0 +; CHECK-NOFPRCVT-NEXT: fmov s0, w8 +; CHECK-NOFPRCVT-NEXT: ret +; +; CHECK-LABEL: fcvtmu_sd_round_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtmu s0, d0 +; CHECK-NEXT: ret + %r = call double @llvm.floor.f64(double %a) + %i = fptoui double %r to i32 + %bc = bitcast i32 %i to float + ret float %bc +} + +define float @fcvtmu_ss_round_simd(float %a) { +; CHECK-NOFPRCVT-LABEL: fcvtmu_ss_round_simd: +; CHECK-NOFPRCVT: // %bb.0: +; CHECK-NOFPRCVT-NEXT: fcvtms s0, s0 +; CHECK-NOFPRCVT-NEXT: ret +; +; CHECK-LABEL: fcvtmu_ss_round_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtms s0, s0 +; CHECK-NEXT: ret + %r = call float @llvm.floor.f32(float %a) + %i = fptosi float %r to i32 + %bc = bitcast i32 %i to float + ret float %bc +} + +define double @fcvtmu_dd_round_simd(double %a) { +; CHECK-NOFPRCVT-LABEL: fcvtmu_dd_round_simd: +; CHECK-NOFPRCVT: // %bb.0: +; CHECK-NOFPRCVT-NEXT: fcvtms d0, d0 +; CHECK-NOFPRCVT-NEXT: ret +; +; CHECK-LABEL: fcvtmu_dd_round_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtms d0, d0 +; CHECK-NEXT: ret + %r = call double @llvm.floor.f64(double %a) + %i = fptosi double %r to i64 + %bc = bitcast i64 %i to double + ret double %bc +} + + +define double @fcvtps_ds_round_simd(float %a) { +; CHECK-NOFPRCVT-LABEL: fcvtps_ds_round_simd: +; CHECK-NOFPRCVT: // %bb.0: +; CHECK-NOFPRCVT-NEXT: fcvtps x8, s0 +; CHECK-NOFPRCVT-NEXT: fmov d0, x8 +; CHECK-NOFPRCVT-NEXT: ret +; +; CHECK-LABEL: fcvtps_ds_round_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtps d0, s0 +; CHECK-NEXT: ret + %r = call float @llvm.ceil.f32(float %a) + %i = fptosi float %r to i64 + %bc = bitcast i64 %i to double + ret double %bc +} + +define float @fcvtps_sd_round_simd(double %a) { +; CHECK-NOFPRCVT-LABEL: fcvtps_sd_round_simd: +; CHECK-NOFPRCVT: // %bb.0: +; CHECK-NOFPRCVT-NEXT: fcvtps w8, d0 +; CHECK-NOFPRCVT-NEXT: fmov s0, w8 +; CHECK-NOFPRCVT-NEXT: ret +; +; CHECK-LABEL: fcvtps_sd_round_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtps s0, d0 +; CHECK-NEXT: ret + %r = call double @llvm.ceil.f64(double %a) + %i = fptosi double %r to i32 + %bc = bitcast i32 %i to float + ret float %bc +} + +define float @fcvtps_ss_round_simd(float %a) { +; CHECK-NOFPRCVT-LABEL: fcvtps_ss_round_simd: +; CHECK-NOFPRCVT: // %bb.0: +; CHECK-NOFPRCVT-NEXT: fcvtps s0, s0 +; CHECK-NOFPRCVT-NEXT: ret +; +; CHECK-LABEL: fcvtps_ss_round_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtps s0, s0 +; CHECK-NEXT: ret + %r = call float @llvm.ceil.f32(float %a) + %i = fptosi float %r to i32 + %bc = bitcast i32 %i to float + ret float %bc +} + +define double @fcvtps_dd_round_simd(double %a) { +; CHECK-NOFPRCVT-LABEL: fcvtps_dd_round_simd: +; CHECK-NOFPRCVT: // %bb.0: +; CHECK-NOFPRCVT-NEXT: fcvtps d0, d0 +; CHECK-NOFPRCVT-NEXT: ret +; +; CHECK-LABEL: fcvtps_dd_round_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtps d0, d0 +; CHECK-NEXT: ret + %r = call double @llvm.ceil.f64(double %a) + %i = fptosi double %r to i64 + %bc = bitcast i64 %i to double + ret double %bc +} + + +define double @fcvtpu_ds_round_simd(float %a) { +; CHECK-NOFPRCVT-LABEL: fcvtpu_ds_round_simd: +; CHECK-NOFPRCVT: // %bb.0: +; CHECK-NOFPRCVT-NEXT: fcvtpu x8, s0 +; CHECK-NOFPRCVT-NEXT: fmov d0, x8 +; CHECK-NOFPRCVT-NEXT: ret +; +; CHECK-LABEL: fcvtpu_ds_round_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtpu d0, s0 +; CHECK-NEXT: ret + %r = call float @llvm.ceil.f32(float %a) + %i = fptoui float %r to i64 + %bc = bitcast i64 %i to double + ret double %bc +} + +define float @fcvtpu_sd_round_simd(double %a) { +; CHECK-NOFPRCVT-LABEL: fcvtpu_sd_round_simd: +; CHECK-NOFPRCVT: // %bb.0: +; CHECK-NOFPRCVT-NEXT: fcvtpu w8, d0 +; CHECK-NOFPRCVT-NEXT: fmov s0, w8 +; CHECK-NOFPRCVT-NEXT: ret +; +; CHECK-LABEL: fcvtpu_sd_round_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtpu s0, d0 +; CHECK-NEXT: ret + %r = call double @llvm.ceil.f64(double %a) + %i = fptoui double %r to i32 + %bc = bitcast i32 %i to float + ret float %bc +} + +define float @fcvtpu_ss_round_simd(float %a) { +; CHECK-NOFPRCVT-LABEL: fcvtpu_ss_round_simd: +; CHECK-NOFPRCVT: // %bb.0: +; CHECK-NOFPRCVT-NEXT: fcvtps s0, s0 +; CHECK-NOFPRCVT-NEXT: ret +; +; CHECK-LABEL: fcvtpu_ss_round_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtps s0, s0 +; CHECK-NEXT: ret + %r = call float @llvm.ceil.f32(float %a) + %i = fptosi float %r to i32 + %bc = bitcast i32 %i to float + ret float %bc +} + +define double @fcvtpu_dd_round_simd(double %a) { +; CHECK-NOFPRCVT-LABEL: fcvtpu_dd_round_simd: +; CHECK-NOFPRCVT: // %bb.0: +; CHECK-NOFPRCVT-NEXT: fcvtps d0, d0 +; CHECK-NOFPRCVT-NEXT: ret +; +; CHECK-LABEL: fcvtpu_dd_round_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtps d0, d0 +; CHECK-NEXT: ret + %r = call double @llvm.ceil.f64(double %a) + %i = fptosi double %r to i64 + %bc = bitcast i64 %i to double + ret double %bc +} + + +define double @fcvtzs_ds_round_simd(float %a) { +; CHECK-NOFPRCVT-LABEL: fcvtzs_ds_round_simd: +; CHECK-NOFPRCVT: // %bb.0: +; CHECK-NOFPRCVT-NEXT: fcvtzs x8, s0 +; CHECK-NOFPRCVT-NEXT: fmov d0, x8 +; CHECK-NOFPRCVT-NEXT: ret +; +; CHECK-LABEL: fcvtzs_ds_round_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtzs d0, s0 +; CHECK-NEXT: ret + %r = call float @llvm.trunc.f32(float %a) + %i = fptosi float %r to i64 + %bc = bitcast i64 %i to double + ret double %bc +} + +define float @fcvtzs_sd_round_simd(double %a) { +; CHECK-NOFPRCVT-LABEL: fcvtzs_sd_round_simd: +; CHECK-NOFPRCVT: // %bb.0: +; CHECK-NOFPRCVT-NEXT: fcvtzs w8, d0 +; CHECK-NOFPRCVT-NEXT: fmov s0, w8 +; CHECK-NOFPRCVT-NEXT: ret +; +; CHECK-LABEL: fcvtzs_sd_round_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtzs s0, d0 +; CHECK-NEXT: ret + %r = call double @llvm.trunc.f64(double %a) + %i = fptosi double %r to i32 + %bc = bitcast i32 %i to float + ret float %bc +} + +define float @fcvtzs_ss_round_simd(float %a) { +; CHECK-NOFPRCVT-LABEL: fcvtzs_ss_round_simd: +; CHECK-NOFPRCVT: // %bb.0: +; CHECK-NOFPRCVT-NEXT: fcvtzs s0, s0 +; CHECK-NOFPRCVT-NEXT: ret +; +; CHECK-LABEL: fcvtzs_ss_round_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtzs s0, s0 +; CHECK-NEXT: ret + %r = call float @llvm.trunc.f32(float %a) + %i = fptosi float %r to i32 + %bc = bitcast i32 %i to float + ret float %bc +} + +define double @fcvtzs_dd_round_simd(double %a) { +; CHECK-NOFPRCVT-LABEL: fcvtzs_dd_round_simd: +; CHECK-NOFPRCVT: // %bb.0: +; CHECK-NOFPRCVT-NEXT: fcvtzs d0, d0 +; CHECK-NOFPRCVT-NEXT: ret +; +; CHECK-LABEL: fcvtzs_dd_round_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtzs d0, d0 +; CHECK-NEXT: ret + %r = call double @llvm.trunc.f64(double %a) + %i = fptosi double %r to i64 + %bc = bitcast i64 %i to double + ret double %bc +} + +define double @fcvtzu_ds_round_simd(float %a) { +; CHECK-NOFPRCVT-LABEL: fcvtzu_ds_round_simd: +; CHECK-NOFPRCVT: // %bb.0: +; CHECK-NOFPRCVT-NEXT: fcvtzu x8, s0 +; CHECK-NOFPRCVT-NEXT: fmov d0, x8 +; CHECK-NOFPRCVT-NEXT: ret +; +; CHECK-LABEL: fcvtzu_ds_round_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtzu d0, s0 +; CHECK-NEXT: ret + %r = call float @llvm.trunc.f32(float %a) + %i = fptoui float %r to i64 + %bc = bitcast i64 %i to double + ret double %bc +} + +define float @fcvtzu_sd_round_simd(double %a) { +; CHECK-NOFPRCVT-LABEL: fcvtzu_sd_round_simd: +; CHECK-NOFPRCVT: // %bb.0: +; CHECK-NOFPRCVT-NEXT: fcvtzu w8, d0 +; CHECK-NOFPRCVT-NEXT: fmov s0, w8 +; CHECK-NOFPRCVT-NEXT: ret +; +; CHECK-LABEL: fcvtzu_sd_round_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtzu s0, d0 +; CHECK-NEXT: ret + %r = call double @llvm.trunc.f64(double %a) + %i = fptoui double %r to i32 + %bc = bitcast i32 %i to float + ret float %bc +} + +define float @fcvtzu_ss_round_simd(float %a) { +; CHECK-NOFPRCVT-LABEL: fcvtzu_ss_round_simd: +; CHECK-NOFPRCVT: // %bb.0: +; CHECK-NOFPRCVT-NEXT: fcvtzs s0, s0 +; CHECK-NOFPRCVT-NEXT: ret +; +; CHECK-LABEL: fcvtzu_ss_round_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtzs s0, s0 +; CHECK-NEXT: ret + %r = call float @llvm.trunc.f32(float %a) + %i = fptosi float %r to i32 + %bc = bitcast i32 %i to float + ret float %bc +} + +define double @fcvtzu_dd_round_simd(double %a) { +; CHECK-NOFPRCVT-LABEL: fcvtzu_dd_round_simd: +; CHECK-NOFPRCVT: // %bb.0: +; CHECK-NOFPRCVT-NEXT: fcvtzs d0, d0 +; CHECK-NOFPRCVT-NEXT: ret +; +; CHECK-LABEL: fcvtzu_dd_round_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtzs d0, d0 +; CHECK-NEXT: ret + %r = call double @llvm.trunc.f64(double %a) + %i = fptosi double %r to i64 + %bc = bitcast i64 %i to double + ret double %bc +} + + +; +; FPTOI saturating +; + +define float @fcvtzs_sh_sat_simd(half %a) { +; CHECK-NOFPRCVT-LABEL: fcvtzs_sh_sat_simd: +; CHECK-NOFPRCVT: // %bb.0: +; CHECK-NOFPRCVT-NEXT: fcvtzs w8, h0 +; CHECK-NOFPRCVT-NEXT: fmov s0, w8 +; CHECK-NOFPRCVT-NEXT: ret +; +; CHECK-LABEL: fcvtzs_sh_sat_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtzs s0, h0 +; CHECK-NEXT: ret + %i = call i32 @llvm.fptosi.sat.i32.f16(half %a) + %bc = bitcast i32 %i to float + ret float %bc +} + +define double @fcvtzs_dh_sat_simd(half %a) { +; CHECK-NOFPRCVT-LABEL: fcvtzs_dh_sat_simd: +; CHECK-NOFPRCVT: // %bb.0: +; CHECK-NOFPRCVT-NEXT: fcvtzs x8, h0 +; CHECK-NOFPRCVT-NEXT: fmov d0, x8 +; CHECK-NOFPRCVT-NEXT: ret +; +; CHECK-LABEL: fcvtzs_dh_sat_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtzs d0, h0 +; CHECK-NEXT: ret + %i = call i64 @llvm.fptosi.sat.i64.f16(half %a) + %bc = bitcast i64 %i to double + ret double %bc +} + +define double @fcvtzs_ds_sat_simd(float %a) { +; CHECK-NOFPRCVT-LABEL: fcvtzs_ds_sat_simd: +; CHECK-NOFPRCVT: // %bb.0: +; CHECK-NOFPRCVT-NEXT: fcvtzs x8, s0 +; CHECK-NOFPRCVT-NEXT: fmov d0, x8 +; CHECK-NOFPRCVT-NEXT: ret +; +; CHECK-LABEL: fcvtzs_ds_sat_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtzs d0, s0 +; CHECK-NEXT: ret + %i = call i64 @llvm.fptosi.sat.i64.f32(float %a) + %bc = bitcast i64 %i to double + ret double %bc +} + +define float @fcvtzs_sd_sat_simd(double %a) { +; CHECK-NOFPRCVT-LABEL: fcvtzs_sd_sat_simd: +; CHECK-NOFPRCVT: // %bb.0: +; CHECK-NOFPRCVT-NEXT: fcvtzs w8, d0 +; CHECK-NOFPRCVT-NEXT: fmov s0, w8 +; CHECK-NOFPRCVT-NEXT: ret +; +; CHECK-LABEL: fcvtzs_sd_sat_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtzs s0, d0 +; CHECK-NEXT: ret + %i = call i32 @llvm.fptosi.sat.i32.f64(double %a) + %bc = bitcast i32 %i to float + ret float %bc +} + +define float @fcvtzs_ss_sat_simd(float %a) { +; CHECK-NOFPRCVT-LABEL: fcvtzs_ss_sat_simd: +; CHECK-NOFPRCVT: // %bb.0: +; CHECK-NOFPRCVT-NEXT: fcvtzs s0, s0 +; CHECK-NOFPRCVT-NEXT: ret +; +; CHECK-LABEL: fcvtzs_ss_sat_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtzs s0, s0 +; CHECK-NEXT: ret + %i = call i32 @llvm.fptosi.sat.i32.f32(float %a) + %bc = bitcast i32 %i to float + ret float %bc +} + +define double @fcvtzs_dd_sat_simd(double %a) { +; CHECK-NOFPRCVT-LABEL: fcvtzs_dd_sat_simd: +; CHECK-NOFPRCVT: // %bb.0: +; CHECK-NOFPRCVT-NEXT: fcvtzs d0, d0 +; CHECK-NOFPRCVT-NEXT: ret +; +; CHECK-LABEL: fcvtzs_dd_sat_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtzs d0, d0 +; CHECK-NEXT: ret + %i = call i64 @llvm.fptosi.sat.i64.f64(double %a) + %bc = bitcast i64 %i to double + ret double %bc +} + +define float @fcvtzu_sh_sat_simd(half %a) { +; CHECK-NOFPRCVT-LABEL: fcvtzu_sh_sat_simd: +; CHECK-NOFPRCVT: // %bb.0: +; CHECK-NOFPRCVT-NEXT: fcvtzu w8, h0 +; CHECK-NOFPRCVT-NEXT: fmov s0, w8 +; CHECK-NOFPRCVT-NEXT: ret +; +; CHECK-LABEL: fcvtzu_sh_sat_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtzu s0, h0 +; CHECK-NEXT: ret + %i = call i32 @llvm.fptoui.sat.i32.f16(half %a) + %bc = bitcast i32 %i to float + ret float %bc +} + +define double @fcvtzu_dh_sat_simd(half %a) { +; CHECK-NOFPRCVT-LABEL: fcvtzu_dh_sat_simd: +; CHECK-NOFPRCVT: // %bb.0: +; CHECK-NOFPRCVT-NEXT: fcvtzu x8, h0 +; CHECK-NOFPRCVT-NEXT: fmov d0, x8 +; CHECK-NOFPRCVT-NEXT: ret +; +; CHECK-LABEL: fcvtzu_dh_sat_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtzu d0, h0 +; CHECK-NEXT: ret + %i = call i64 @llvm.fptoui.sat.i64.f16(half %a) + %bc = bitcast i64 %i to double + ret double %bc +} + +define double @fcvtzu_ds_sat_simd(float %a) { +; CHECK-NOFPRCVT-LABEL: fcvtzu_ds_sat_simd: +; CHECK-NOFPRCVT: // %bb.0: +; CHECK-NOFPRCVT-NEXT: fcvtzu x8, s0 +; CHECK-NOFPRCVT-NEXT: fmov d0, x8 +; CHECK-NOFPRCVT-NEXT: ret +; +; CHECK-LABEL: fcvtzu_ds_sat_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtzu d0, s0 +; CHECK-NEXT: ret + %i = call i64 @llvm.fptoui.sat.i64.f32(float %a) + %bc = bitcast i64 %i to double + ret double %bc +} + +define float @fcvtzu_sd_sat_simd(double %a) { +; CHECK-NOFPRCVT-LABEL: fcvtzu_sd_sat_simd: +; CHECK-NOFPRCVT: // %bb.0: +; CHECK-NOFPRCVT-NEXT: fcvtzu w8, d0 +; CHECK-NOFPRCVT-NEXT: fmov s0, w8 +; CHECK-NOFPRCVT-NEXT: ret +; +; CHECK-LABEL: fcvtzu_sd_sat_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtzu s0, d0 +; CHECK-NEXT: ret + %i = call i32 @llvm.fptoui.sat.i32.f64(double %a) + %bc = bitcast i32 %i to float + ret float %bc +} + +define float @fcvtzu_ss_sat_simd(float %a) { +; CHECK-NOFPRCVT-LABEL: fcvtzu_ss_sat_simd: +; CHECK-NOFPRCVT: // %bb.0: +; CHECK-NOFPRCVT-NEXT: fcvtzs s0, s0 +; CHECK-NOFPRCVT-NEXT: ret +; +; CHECK-LABEL: fcvtzu_ss_sat_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtzs s0, s0 +; CHECK-NEXT: ret + %i = call i32 @llvm.fptosi.sat.i32.f32(float %a) + %bc = bitcast i32 %i to float + ret float %bc +} + +define double @fcvtzu_dd_sat_simd(double %a) { +; CHECK-NOFPRCVT-LABEL: fcvtzu_dd_sat_simd: +; CHECK-NOFPRCVT: // %bb.0: +; CHECK-NOFPRCVT-NEXT: fcvtzs d0, d0 +; CHECK-NOFPRCVT-NEXT: ret +; +; CHECK-LABEL: fcvtzu_dd_sat_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtzs d0, d0 +; CHECK-NEXT: ret + %i = call i64 @llvm.fptosi.sat.i64.f64(double %a) + %bc = bitcast i64 %i to double + ret double %bc +} + +; +; FPTOI saturating with rounding +; + +define float @fcvtas_sh_simd(half %a) { +; CHECK-NOFPRCVT-LABEL: fcvtas_sh_simd: +; CHECK-NOFPRCVT: // %bb.0: +; CHECK-NOFPRCVT-NEXT: fcvtas w8, h0 +; CHECK-NOFPRCVT-NEXT: fmov s0, w8 +; CHECK-NOFPRCVT-NEXT: ret +; +; CHECK-LABEL: fcvtas_sh_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtas s0, h0 +; CHECK-NEXT: ret + %r = call half @llvm.round.f16(half %a) nounwind readnone + %i = call i32 @llvm.fptosi.sat.i32.f16(half %r) + %bc = bitcast i32 %i to float + ret float %bc +} + +define double @fcvtas_dh_simd(half %a) { +; CHECK-NOFPRCVT-LABEL: fcvtas_dh_simd: +; CHECK-NOFPRCVT: // %bb.0: +; CHECK-NOFPRCVT-NEXT: fcvtas x8, h0 +; CHECK-NOFPRCVT-NEXT: fmov d0, x8 +; CHECK-NOFPRCVT-NEXT: ret +; +; CHECK-LABEL: fcvtas_dh_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtas d0, h0 +; CHECK-NEXT: ret + %r = call half @llvm.round.f16(half %a) nounwind readnone + %i = call i64 @llvm.fptosi.sat.i64.f16(half %r) + %bc = bitcast i64 %i to double + ret double %bc +} + +define double @fcvtas_ds_simd(float %a) { +; CHECK-NOFPRCVT-LABEL: fcvtas_ds_simd: +; CHECK-NOFPRCVT: // %bb.0: +; CHECK-NOFPRCVT-NEXT: fcvtas x8, s0 +; CHECK-NOFPRCVT-NEXT: fmov d0, x8 +; CHECK-NOFPRCVT-NEXT: ret +; +; CHECK-LABEL: fcvtas_ds_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtas d0, s0 +; CHECK-NEXT: ret + %r = call float @llvm.round.f32(float %a) + %i = call i64 @llvm.fptosi.sat.i64.f32(float %r) + %bc = bitcast i64 %i to double + ret double %bc +} + +define float @fcvtas_sd_simd(double %a) { +; CHECK-NOFPRCVT-LABEL: fcvtas_sd_simd: +; CHECK-NOFPRCVT: // %bb.0: +; CHECK-NOFPRCVT-NEXT: fcvtas w8, d0 +; CHECK-NOFPRCVT-NEXT: fmov s0, w8 +; CHECK-NOFPRCVT-NEXT: ret +; +; CHECK-LABEL: fcvtas_sd_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtas s0, d0 +; CHECK-NEXT: ret + %r = call double @llvm.round.f64(double %a) + %i = call i32 @llvm.fptosi.sat.i32.f64(double %r) + %bc = bitcast i32 %i to float + ret float %bc +} + +define float @fcvtas_ss_simd(float %a) { +; CHECK-NOFPRCVT-LABEL: fcvtas_ss_simd: +; CHECK-NOFPRCVT: // %bb.0: +; CHECK-NOFPRCVT-NEXT: fcvtas s0, s0 +; CHECK-NOFPRCVT-NEXT: ret +; +; CHECK-LABEL: fcvtas_ss_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtas s0, s0 +; CHECK-NEXT: ret + %r = call float @llvm.round.f32(float %a) + %i = call i32 @llvm.fptosi.sat.i32.f32(float %r) + %bc = bitcast i32 %i to float + ret float %bc +} + +define double @fcvtas_dd_simd(double %a) { +; CHECK-NOFPRCVT-LABEL: fcvtas_dd_simd: +; CHECK-NOFPRCVT: // %bb.0: +; CHECK-NOFPRCVT-NEXT: fcvtas d0, d0 +; CHECK-NOFPRCVT-NEXT: ret +; +; CHECK-LABEL: fcvtas_dd_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtas d0, d0 +; CHECK-NEXT: ret + %r = call double @llvm.round.f64(double %a) + %i = call i64 @llvm.fptosi.sat.i64.f64(double %r) + %bc = bitcast i64 %i to double + ret double %bc +} + +define float @fcvtau_sh_simd(half %a) { +; CHECK-NOFPRCVT-LABEL: fcvtau_sh_simd: +; CHECK-NOFPRCVT: // %bb.0: +; CHECK-NOFPRCVT-NEXT: fcvtau w8, h0 +; CHECK-NOFPRCVT-NEXT: fmov s0, w8 +; CHECK-NOFPRCVT-NEXT: ret +; +; CHECK-LABEL: fcvtau_sh_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtau s0, h0 +; CHECK-NEXT: ret + %r = call half @llvm.round.f16(half %a) nounwind readnone + %i = call i32 @llvm.fptoui.sat.i32.f16(half %r) + %bc = bitcast i32 %i to float + ret float %bc +} + +define double @fcvtau_dh_simd(half %a) { +; CHECK-NOFPRCVT-LABEL: fcvtau_dh_simd: +; CHECK-NOFPRCVT: // %bb.0: +; CHECK-NOFPRCVT-NEXT: fcvtau x8, h0 +; CHECK-NOFPRCVT-NEXT: fmov d0, x8 +; CHECK-NOFPRCVT-NEXT: ret +; +; CHECK-LABEL: fcvtau_dh_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtau d0, h0 +; CHECK-NEXT: ret + %r = call half @llvm.round.f16(half %a) nounwind readnone + %i = call i64 @llvm.fptoui.sat.i64.f16(half %r) + %bc = bitcast i64 %i to double + ret double %bc +} + +define double @fcvtau_ds_simd(float %a) { +; CHECK-NOFPRCVT-LABEL: fcvtau_ds_simd: +; CHECK-NOFPRCVT: // %bb.0: +; CHECK-NOFPRCVT-NEXT: fcvtau x8, s0 +; CHECK-NOFPRCVT-NEXT: fmov d0, x8 +; CHECK-NOFPRCVT-NEXT: ret +; +; CHECK-LABEL: fcvtau_ds_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtau d0, s0 +; CHECK-NEXT: ret + %r = call float @llvm.round.f32(float %a) + %i = call i64 @llvm.fptoui.sat.i64.f32(float %r) + %bc = bitcast i64 %i to double + ret double %bc +} + +define float @fcvtau_sd_simd(double %a) { +; CHECK-NOFPRCVT-LABEL: fcvtau_sd_simd: +; CHECK-NOFPRCVT: // %bb.0: +; CHECK-NOFPRCVT-NEXT: fcvtau w8, d0 +; CHECK-NOFPRCVT-NEXT: fmov s0, w8 +; CHECK-NOFPRCVT-NEXT: ret +; +; CHECK-LABEL: fcvtau_sd_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtau s0, d0 +; CHECK-NEXT: ret + %r = call double @llvm.round.f64(double %a) + %i = call i32 @llvm.fptoui.sat.i32.f64(double %r) + %bc = bitcast i32 %i to float + ret float %bc +} + +define float @fcvtau_ss_simd(float %a) { +; CHECK-NOFPRCVT-LABEL: fcvtau_ss_simd: +; CHECK-NOFPRCVT: // %bb.0: +; CHECK-NOFPRCVT-NEXT: fcvtas s0, s0 +; CHECK-NOFPRCVT-NEXT: ret +; +; CHECK-LABEL: fcvtau_ss_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtas s0, s0 +; CHECK-NEXT: ret + %r = call float @llvm.round.f32(float %a) + %i = call i32 @llvm.fptosi.sat.i32.f32(float %r) + %bc = bitcast i32 %i to float + ret float %bc +} + +define double @fcvtau_dd_simd(double %a) { +; CHECK-NOFPRCVT-LABEL: fcvtau_dd_simd: +; CHECK-NOFPRCVT: // %bb.0: +; CHECK-NOFPRCVT-NEXT: fcvtas d0, d0 +; CHECK-NOFPRCVT-NEXT: ret +; +; CHECK-LABEL: fcvtau_dd_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtas d0, d0 +; CHECK-NEXT: ret + %r = call double @llvm.round.f64(double %a) + %i = call i64 @llvm.fptosi.sat.i64.f64(double %r) + %bc = bitcast i64 %i to double + ret double %bc +} + +define float @fcvtms_sh_simd(half %a) { +; CHECK-NOFPRCVT-LABEL: fcvtms_sh_simd: +; CHECK-NOFPRCVT: // %bb.0: +; CHECK-NOFPRCVT-NEXT: fcvtms w8, h0 +; CHECK-NOFPRCVT-NEXT: fmov s0, w8 +; CHECK-NOFPRCVT-NEXT: ret +; +; CHECK-LABEL: fcvtms_sh_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtms s0, h0 +; CHECK-NEXT: ret + %r = call half @llvm.floor.f16(half %a) nounwind readnone + %i = call i32 @llvm.fptosi.sat.i32.f16(half %r) + %bc = bitcast i32 %i to float + ret float %bc +} + +define double @fcvtms_dh_simd(half %a) { +; CHECK-NOFPRCVT-LABEL: fcvtms_dh_simd: +; CHECK-NOFPRCVT: // %bb.0: +; CHECK-NOFPRCVT-NEXT: fcvtms x8, h0 +; CHECK-NOFPRCVT-NEXT: fmov d0, x8 +; CHECK-NOFPRCVT-NEXT: ret +; +; CHECK-LABEL: fcvtms_dh_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtms d0, h0 +; CHECK-NEXT: ret + %r = call half @llvm.floor.f16(half %a) nounwind readnone + %i = call i64 @llvm.fptosi.sat.i64.f16(half %r) + %bc = bitcast i64 %i to double + ret double %bc +} + +define double @fcvtms_ds_simd(float %a) { +; CHECK-NOFPRCVT-LABEL: fcvtms_ds_simd: +; CHECK-NOFPRCVT: // %bb.0: +; CHECK-NOFPRCVT-NEXT: fcvtms x8, s0 +; CHECK-NOFPRCVT-NEXT: fmov d0, x8 +; CHECK-NOFPRCVT-NEXT: ret +; +; CHECK-LABEL: fcvtms_ds_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtms d0, s0 +; CHECK-NEXT: ret + %r = call float @llvm.floor.f32(float %a) + %i = call i64 @llvm.fptosi.sat.i64.f32(float %r) + %bc = bitcast i64 %i to double + ret double %bc +} + +define float @fcvtms_sd_simd(double %a) { +; CHECK-NOFPRCVT-LABEL: fcvtms_sd_simd: +; CHECK-NOFPRCVT: // %bb.0: +; CHECK-NOFPRCVT-NEXT: fcvtms w8, d0 +; CHECK-NOFPRCVT-NEXT: fmov s0, w8 +; CHECK-NOFPRCVT-NEXT: ret +; +; CHECK-LABEL: fcvtms_sd_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtms s0, d0 +; CHECK-NEXT: ret + %r = call double @llvm.floor.f64(double %a) + %i = call i32 @llvm.fptosi.sat.i32.f64(double %r) + %bc = bitcast i32 %i to float + ret float %bc +} + +define float @fcvtms_ss_simd(float %a) { +; CHECK-NOFPRCVT-LABEL: fcvtms_ss_simd: +; CHECK-NOFPRCVT: // %bb.0: +; CHECK-NOFPRCVT-NEXT: fcvtms s0, s0 +; CHECK-NOFPRCVT-NEXT: ret +; +; CHECK-LABEL: fcvtms_ss_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtms s0, s0 +; CHECK-NEXT: ret + %r = call float @llvm.floor.f32(float %a) + %i = call i32 @llvm.fptosi.sat.i32.f32(float %r) + %bc = bitcast i32 %i to float + ret float %bc +} + +define double @fcvtms_dd_simd(double %a) { +; CHECK-NOFPRCVT-LABEL: fcvtms_dd_simd: +; CHECK-NOFPRCVT: // %bb.0: +; CHECK-NOFPRCVT-NEXT: fcvtms d0, d0 +; CHECK-NOFPRCVT-NEXT: ret +; +; CHECK-LABEL: fcvtms_dd_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtms d0, d0 +; CHECK-NEXT: ret + %r = call double @llvm.floor.f64(double %a) + %i = call i64 @llvm.fptosi.sat.i64.f64(double %r) + %bc = bitcast i64 %i to double + ret double %bc +} + +define float @fcvtmu_sh_simd(half %a) { +; CHECK-NOFPRCVT-LABEL: fcvtmu_sh_simd: +; CHECK-NOFPRCVT: // %bb.0: +; CHECK-NOFPRCVT-NEXT: fcvtmu w8, h0 +; CHECK-NOFPRCVT-NEXT: fmov s0, w8 +; CHECK-NOFPRCVT-NEXT: ret +; +; CHECK-LABEL: fcvtmu_sh_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtmu s0, h0 +; CHECK-NEXT: ret + %r = call half @llvm.floor.f16(half %a) nounwind readnone + %i = call i32 @llvm.fptoui.sat.i32.f16(half %r) + %bc = bitcast i32 %i to float + ret float %bc +} + +define double @fcvtmu_dh_simd(half %a) { +; CHECK-NOFPRCVT-LABEL: fcvtmu_dh_simd: +; CHECK-NOFPRCVT: // %bb.0: +; CHECK-NOFPRCVT-NEXT: fcvtmu x8, h0 +; CHECK-NOFPRCVT-NEXT: fmov d0, x8 +; CHECK-NOFPRCVT-NEXT: ret +; +; CHECK-LABEL: fcvtmu_dh_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtmu d0, h0 +; CHECK-NEXT: ret + %r = call half @llvm.floor.f16(half %a) nounwind readnone + %i = call i64 @llvm.fptoui.sat.i64.f16(half %r) + %bc = bitcast i64 %i to double + ret double %bc +} + +define double @fcvtmu_ds_simd(float %a) { +; CHECK-NOFPRCVT-LABEL: fcvtmu_ds_simd: +; CHECK-NOFPRCVT: // %bb.0: +; CHECK-NOFPRCVT-NEXT: fcvtmu x8, s0 +; CHECK-NOFPRCVT-NEXT: fmov d0, x8 +; CHECK-NOFPRCVT-NEXT: ret +; +; CHECK-LABEL: fcvtmu_ds_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtmu d0, s0 +; CHECK-NEXT: ret + %r = call float @llvm.floor.f32(float %a) + %i = call i64 @llvm.fptoui.sat.i64.f32(float %r) + %bc = bitcast i64 %i to double + ret double %bc +} + +define float @fcvtmu_sd_simd(double %a) { +; CHECK-NOFPRCVT-LABEL: fcvtmu_sd_simd: +; CHECK-NOFPRCVT: // %bb.0: +; CHECK-NOFPRCVT-NEXT: fcvtmu w8, d0 +; CHECK-NOFPRCVT-NEXT: fmov s0, w8 +; CHECK-NOFPRCVT-NEXT: ret +; +; CHECK-LABEL: fcvtmu_sd_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtmu s0, d0 +; CHECK-NEXT: ret + %r = call double @llvm.floor.f64(double %a) + %i = call i32 @llvm.fptoui.sat.i32.f64(double %r) + %bc = bitcast i32 %i to float + ret float %bc +} + +define float @fcvtmu_ss_simd(float %a) { +; CHECK-NOFPRCVT-LABEL: fcvtmu_ss_simd: +; CHECK-NOFPRCVT: // %bb.0: +; CHECK-NOFPRCVT-NEXT: fcvtms s0, s0 +; CHECK-NOFPRCVT-NEXT: ret +; +; CHECK-LABEL: fcvtmu_ss_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtms s0, s0 +; CHECK-NEXT: ret + %r = call float @llvm.floor.f32(float %a) + %i = call i32 @llvm.fptosi.sat.i32.f32(float %r) + %bc = bitcast i32 %i to float + ret float %bc +} + +define double @fcvtmu_dd_simd(double %a) { +; CHECK-NOFPRCVT-LABEL: fcvtmu_dd_simd: +; CHECK-NOFPRCVT: // %bb.0: +; CHECK-NOFPRCVT-NEXT: fcvtms d0, d0 +; CHECK-NOFPRCVT-NEXT: ret +; +; CHECK-LABEL: fcvtmu_dd_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtms d0, d0 +; CHECK-NEXT: ret + %r = call double @llvm.floor.f64(double %a) + %i = call i64 @llvm.fptosi.sat.i64.f64(double %r) + %bc = bitcast i64 %i to double + ret double %bc +} + +define float @fcvtps_sh_simd(half %a) { +; CHECK-NOFPRCVT-LABEL: fcvtps_sh_simd: +; CHECK-NOFPRCVT: // %bb.0: +; CHECK-NOFPRCVT-NEXT: fcvtps w8, h0 +; CHECK-NOFPRCVT-NEXT: fmov s0, w8 +; CHECK-NOFPRCVT-NEXT: ret +; +; CHECK-LABEL: fcvtps_sh_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtps s0, h0 +; CHECK-NEXT: ret + %r = call half @llvm.ceil.f16(half %a) nounwind readnone + %i = call i32 @llvm.fptosi.sat.i32.f16(half %r) + %bc = bitcast i32 %i to float + ret float %bc +} + +define double @fcvtps_dh_simd(half %a) { +; CHECK-NOFPRCVT-LABEL: fcvtps_dh_simd: +; CHECK-NOFPRCVT: // %bb.0: +; CHECK-NOFPRCVT-NEXT: fcvtps x8, h0 +; CHECK-NOFPRCVT-NEXT: fmov d0, x8 +; CHECK-NOFPRCVT-NEXT: ret +; +; CHECK-LABEL: fcvtps_dh_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtps d0, h0 +; CHECK-NEXT: ret + %r = call half @llvm.ceil.f16(half %a) nounwind readnone + %i = call i64 @llvm.fptosi.sat.i64.f16(half %r) + %bc = bitcast i64 %i to double + ret double %bc +} + +define double @fcvtps_ds_simd(float %a) { +; CHECK-NOFPRCVT-LABEL: fcvtps_ds_simd: +; CHECK-NOFPRCVT: // %bb.0: +; CHECK-NOFPRCVT-NEXT: fcvtps x8, s0 +; CHECK-NOFPRCVT-NEXT: fmov d0, x8 +; CHECK-NOFPRCVT-NEXT: ret +; +; CHECK-LABEL: fcvtps_ds_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtps d0, s0 +; CHECK-NEXT: ret + %r = call float @llvm.ceil.f32(float %a) + %i = call i64 @llvm.fptosi.sat.i64.f32(float %r) + %bc = bitcast i64 %i to double + ret double %bc +} + +define float @fcvtps_sd_simd(double %a) { +; CHECK-NOFPRCVT-LABEL: fcvtps_sd_simd: +; CHECK-NOFPRCVT: // %bb.0: +; CHECK-NOFPRCVT-NEXT: fcvtps w8, d0 +; CHECK-NOFPRCVT-NEXT: fmov s0, w8 +; CHECK-NOFPRCVT-NEXT: ret +; +; CHECK-LABEL: fcvtps_sd_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtps s0, d0 +; CHECK-NEXT: ret + %r = call double @llvm.ceil.f64(double %a) + %i = call i32 @llvm.fptosi.sat.i32.f64(double %r) + %bc = bitcast i32 %i to float + ret float %bc +} + +define float @fcvtps_ss_simd(float %a) { +; CHECK-NOFPRCVT-LABEL: fcvtps_ss_simd: +; CHECK-NOFPRCVT: // %bb.0: +; CHECK-NOFPRCVT-NEXT: fcvtps s0, s0 +; CHECK-NOFPRCVT-NEXT: ret +; +; CHECK-LABEL: fcvtps_ss_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtps s0, s0 +; CHECK-NEXT: ret + %r = call float @llvm.ceil.f32(float %a) + %i = call i32 @llvm.fptosi.sat.i32.f32(float %r) + %bc = bitcast i32 %i to float + ret float %bc +} + +define double @fcvtps_dd_simd(double %a) { +; CHECK-NOFPRCVT-LABEL: fcvtps_dd_simd: +; CHECK-NOFPRCVT: // %bb.0: +; CHECK-NOFPRCVT-NEXT: fcvtps d0, d0 +; CHECK-NOFPRCVT-NEXT: ret +; +; CHECK-LABEL: fcvtps_dd_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtps d0, d0 +; CHECK-NEXT: ret + %r = call double @llvm.ceil.f64(double %a) + %i = call i64 @llvm.fptosi.sat.i64.f64(double %r) + %bc = bitcast i64 %i to double + ret double %bc +} + +define float @fcvtpu_sh_simd(half %a) { +; CHECK-NOFPRCVT-LABEL: fcvtpu_sh_simd: +; CHECK-NOFPRCVT: // %bb.0: +; CHECK-NOFPRCVT-NEXT: fcvtpu w8, h0 +; CHECK-NOFPRCVT-NEXT: fmov s0, w8 +; CHECK-NOFPRCVT-NEXT: ret +; +; CHECK-LABEL: fcvtpu_sh_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtpu s0, h0 +; CHECK-NEXT: ret + %r = call half @llvm.ceil.f16(half %a) nounwind readnone + %i = call i32 @llvm.fptoui.sat.i32.f16(half %r) + %bc = bitcast i32 %i to float + ret float %bc +} + +define double @fcvtpu_dh_simd(half %a) { +; CHECK-NOFPRCVT-LABEL: fcvtpu_dh_simd: +; CHECK-NOFPRCVT: // %bb.0: +; CHECK-NOFPRCVT-NEXT: fcvtpu x8, h0 +; CHECK-NOFPRCVT-NEXT: fmov d0, x8 +; CHECK-NOFPRCVT-NEXT: ret +; +; CHECK-LABEL: fcvtpu_dh_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtpu d0, h0 +; CHECK-NEXT: ret + %r = call half @llvm.ceil.f16(half %a) nounwind readnone + %i = call i64 @llvm.fptoui.sat.i64.f16(half %r) + %bc = bitcast i64 %i to double + ret double %bc +} + +define double @fcvtpu_ds_simd(float %a) { +; CHECK-NOFPRCVT-LABEL: fcvtpu_ds_simd: +; CHECK-NOFPRCVT: // %bb.0: +; CHECK-NOFPRCVT-NEXT: fcvtpu x8, s0 +; CHECK-NOFPRCVT-NEXT: fmov d0, x8 +; CHECK-NOFPRCVT-NEXT: ret +; +; CHECK-LABEL: fcvtpu_ds_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtpu d0, s0 +; CHECK-NEXT: ret + %r = call float @llvm.ceil.f32(float %a) + %i = call i64 @llvm.fptoui.sat.i64.f32(float %r) + %bc = bitcast i64 %i to double + ret double %bc +} + +define float @fcvtpu_sd_simd(double %a) { +; CHECK-NOFPRCVT-LABEL: fcvtpu_sd_simd: +; CHECK-NOFPRCVT: // %bb.0: +; CHECK-NOFPRCVT-NEXT: fcvtpu w8, d0 +; CHECK-NOFPRCVT-NEXT: fmov s0, w8 +; CHECK-NOFPRCVT-NEXT: ret +; +; CHECK-LABEL: fcvtpu_sd_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtpu s0, d0 +; CHECK-NEXT: ret + %r = call double @llvm.ceil.f64(double %a) + %i = call i32 @llvm.fptoui.sat.i32.f64(double %r) + %bc = bitcast i32 %i to float + ret float %bc +} + +define float @fcvtpu_ss_simd(float %a) { +; CHECK-NOFPRCVT-LABEL: fcvtpu_ss_simd: +; CHECK-NOFPRCVT: // %bb.0: +; CHECK-NOFPRCVT-NEXT: fcvtps s0, s0 +; CHECK-NOFPRCVT-NEXT: ret +; +; CHECK-LABEL: fcvtpu_ss_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtps s0, s0 +; CHECK-NEXT: ret + %r = call float @llvm.ceil.f32(float %a) + %i = call i32 @llvm.fptosi.sat.i32.f32(float %r) + %bc = bitcast i32 %i to float + ret float %bc +} + +define double @fcvtpu_dd_simd(double %a) { +; CHECK-NOFPRCVT-LABEL: fcvtpu_dd_simd: +; CHECK-NOFPRCVT: // %bb.0: +; CHECK-NOFPRCVT-NEXT: fcvtps d0, d0 +; CHECK-NOFPRCVT-NEXT: ret +; +; CHECK-LABEL: fcvtpu_dd_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtps d0, d0 +; CHECK-NEXT: ret + %r = call double @llvm.ceil.f64(double %a) + %i = call i64 @llvm.fptosi.sat.i64.f64(double %r) + %bc = bitcast i64 %i to double + ret double %bc +} + +define float @fcvtzs_sh_simd(half %a) { +; CHECK-NOFPRCVT-LABEL: fcvtzs_sh_simd: +; CHECK-NOFPRCVT: // %bb.0: +; CHECK-NOFPRCVT-NEXT: fcvtzs w8, h0 +; CHECK-NOFPRCVT-NEXT: fmov s0, w8 +; CHECK-NOFPRCVT-NEXT: ret +; +; CHECK-LABEL: fcvtzs_sh_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtzs s0, h0 +; CHECK-NEXT: ret + %r = call half @llvm.trunc.f16(half %a) nounwind readnone + %i = call i32 @llvm.fptosi.sat.i32.f16(half %r) + %bc = bitcast i32 %i to float + ret float %bc +} + +define double @fcvtzs_dh_simd(half %a) { +; CHECK-NOFPRCVT-LABEL: fcvtzs_dh_simd: +; CHECK-NOFPRCVT: // %bb.0: +; CHECK-NOFPRCVT-NEXT: fcvtzs x8, h0 +; CHECK-NOFPRCVT-NEXT: fmov d0, x8 +; CHECK-NOFPRCVT-NEXT: ret +; +; CHECK-LABEL: fcvtzs_dh_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtzs d0, h0 +; CHECK-NEXT: ret + %r = call half @llvm.trunc.f16(half %a) nounwind readnone + %i = call i64 @llvm.fptosi.sat.i64.f16(half %r) + %bc = bitcast i64 %i to double + ret double %bc +} + +define double @fcvtzs_ds_simd(float %a) { +; CHECK-NOFPRCVT-LABEL: fcvtzs_ds_simd: +; CHECK-NOFPRCVT: // %bb.0: +; CHECK-NOFPRCVT-NEXT: fcvtzs x8, s0 +; CHECK-NOFPRCVT-NEXT: fmov d0, x8 +; CHECK-NOFPRCVT-NEXT: ret +; +; CHECK-LABEL: fcvtzs_ds_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtzs d0, s0 +; CHECK-NEXT: ret + %r = call float @llvm.trunc.f32(float %a) + %i = call i64 @llvm.fptosi.sat.i64.f32(float %r) + %bc = bitcast i64 %i to double + ret double %bc +} + +define float @fcvtzs_sd_simd(double %a) { +; CHECK-NOFPRCVT-LABEL: fcvtzs_sd_simd: +; CHECK-NOFPRCVT: // %bb.0: +; CHECK-NOFPRCVT-NEXT: fcvtzs w8, d0 +; CHECK-NOFPRCVT-NEXT: fmov s0, w8 +; CHECK-NOFPRCVT-NEXT: ret +; +; CHECK-LABEL: fcvtzs_sd_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtzs s0, d0 +; CHECK-NEXT: ret + %r = call double @llvm.trunc.f64(double %a) + %i = call i32 @llvm.fptosi.sat.i32.f64(double %r) + %bc = bitcast i32 %i to float + ret float %bc +} + +define float @fcvtzs_ss_simd(float %a) { +; CHECK-NOFPRCVT-LABEL: fcvtzs_ss_simd: +; CHECK-NOFPRCVT: // %bb.0: +; CHECK-NOFPRCVT-NEXT: fcvtzs s0, s0 +; CHECK-NOFPRCVT-NEXT: ret +; +; CHECK-LABEL: fcvtzs_ss_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtzs s0, s0 +; CHECK-NEXT: ret + %r = call float @llvm.trunc.f32(float %a) + %i = call i32 @llvm.fptosi.sat.i32.f32(float %r) + %bc = bitcast i32 %i to float + ret float %bc +} + +define double @fcvtzs_dd_simd(double %a) { +; CHECK-NOFPRCVT-LABEL: fcvtzs_dd_simd: +; CHECK-NOFPRCVT: // %bb.0: +; CHECK-NOFPRCVT-NEXT: fcvtzs d0, d0 +; CHECK-NOFPRCVT-NEXT: ret +; +; CHECK-LABEL: fcvtzs_dd_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtzs d0, d0 +; CHECK-NEXT: ret + %r = call double @llvm.trunc.f64(double %a) + %i = call i64 @llvm.fptosi.sat.i64.f64(double %r) + %bc = bitcast i64 %i to double + ret double %bc +} + +define float @fcvtzu_sh_simd(half %a) { +; CHECK-NOFPRCVT-LABEL: fcvtzu_sh_simd: +; CHECK-NOFPRCVT: // %bb.0: +; CHECK-NOFPRCVT-NEXT: fcvtzu w8, h0 +; CHECK-NOFPRCVT-NEXT: fmov s0, w8 +; CHECK-NOFPRCVT-NEXT: ret +; +; CHECK-LABEL: fcvtzu_sh_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtzu s0, h0 +; CHECK-NEXT: ret + %r = call half @llvm.trunc.f16(half %a) nounwind readnone + %i = call i32 @llvm.fptoui.sat.i32.f16(half %r) + %bc = bitcast i32 %i to float + ret float %bc +} + +define double @fcvtzu_dh_simd(half %a) { +; CHECK-NOFPRCVT-LABEL: fcvtzu_dh_simd: +; CHECK-NOFPRCVT: // %bb.0: +; CHECK-NOFPRCVT-NEXT: fcvtzu x8, h0 +; CHECK-NOFPRCVT-NEXT: fmov d0, x8 +; CHECK-NOFPRCVT-NEXT: ret +; +; CHECK-LABEL: fcvtzu_dh_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtzu d0, h0 +; CHECK-NEXT: ret + %r = call half @llvm.trunc.f16(half %a) nounwind readnone + %i = call i64 @llvm.fptoui.sat.i64.f16(half %r) + %bc = bitcast i64 %i to double + ret double %bc +} + +define double @fcvtzu_ds_simd(float %a) { +; CHECK-NOFPRCVT-LABEL: fcvtzu_ds_simd: +; CHECK-NOFPRCVT: // %bb.0: +; CHECK-NOFPRCVT-NEXT: fcvtzu x8, s0 +; CHECK-NOFPRCVT-NEXT: fmov d0, x8 +; CHECK-NOFPRCVT-NEXT: ret +; +; CHECK-LABEL: fcvtzu_ds_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtzu d0, s0 +; CHECK-NEXT: ret + %r = call float @llvm.trunc.f32(float %a) + %i = call i64 @llvm.fptoui.sat.i64.f32(float %r) + %bc = bitcast i64 %i to double + ret double %bc +} + +define float @fcvtzu_sd_simd(double %a) { +; CHECK-NOFPRCVT-LABEL: fcvtzu_sd_simd: +; CHECK-NOFPRCVT: // %bb.0: +; CHECK-NOFPRCVT-NEXT: fcvtzu w8, d0 +; CHECK-NOFPRCVT-NEXT: fmov s0, w8 +; CHECK-NOFPRCVT-NEXT: ret +; +; CHECK-LABEL: fcvtzu_sd_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtzu s0, d0 +; CHECK-NEXT: ret + %r = call double @llvm.trunc.f64(double %a) + %i = call i32 @llvm.fptoui.sat.i32.f64(double %r) + %bc = bitcast i32 %i to float + ret float %bc +} + +define float @fcvtzu_ss_simd(float %a) { +; CHECK-NOFPRCVT-LABEL: fcvtzu_ss_simd: +; CHECK-NOFPRCVT: // %bb.0: +; CHECK-NOFPRCVT-NEXT: fcvtzu s0, s0 +; CHECK-NOFPRCVT-NEXT: ret +; +; CHECK-LABEL: fcvtzu_ss_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtzu s0, s0 +; CHECK-NEXT: ret + %r = call float @llvm.trunc.f32(float %a) + %i = call i32 @llvm.fptoui.sat.i32.f32(float %r) + %bc = bitcast i32 %i to float + ret float %bc +} + +define double @fcvtzu_dd_simd(double %a) { +; CHECK-NOFPRCVT-LABEL: fcvtzu_dd_simd: +; CHECK-NOFPRCVT: // %bb.0: +; CHECK-NOFPRCVT-NEXT: fcvtzu d0, d0 +; CHECK-NOFPRCVT-NEXT: ret +; +; CHECK-LABEL: fcvtzu_dd_simd: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtzu d0, d0 +; CHECK-NEXT: ret + %r = call double @llvm.trunc.f64(double %a) + %i = call i64 @llvm.fptoui.sat.i64.f64(double %r) + %bc = bitcast i64 %i to double + ret double %bc +} diff --git a/llvm/test/CodeGen/AArch64/arm64-neon-copy.ll b/llvm/test/CodeGen/AArch64/arm64-neon-copy.ll index e18a5f6..d8f3708 100644 --- a/llvm/test/CodeGen/AArch64/arm64-neon-copy.ll +++ b/llvm/test/CodeGen/AArch64/arm64-neon-copy.ll @@ -980,12 +980,18 @@ define <1 x double> @test_bitcasti64tov1f64(i64 %in) { } define <1 x i64> @test_bitcastv8i8tov1f64(<8 x i8> %a) #0 { -; CHECK-LABEL: test_bitcastv8i8tov1f64: -; CHECK: // %bb.0: -; CHECK-NEXT: neg v0.8b, v0.8b -; CHECK-NEXT: fcvtzs x8, d0 -; CHECK-NEXT: fmov d0, x8 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: test_bitcastv8i8tov1f64: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: neg v0.8b, v0.8b +; CHECK-SD-NEXT: fcvtzs x8, d0 +; CHECK-SD-NEXT: fmov d0, x8 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: test_bitcastv8i8tov1f64: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: neg v0.8b, v0.8b +; CHECK-GI-NEXT: fcvtzs d0, d0 +; CHECK-GI-NEXT: ret %sub.i = sub <8 x i8> zeroinitializer, %a %1 = bitcast <8 x i8> %sub.i to <1 x double> %vcvt.i = fptosi <1 x double> %1 to <1 x i64> @@ -993,12 +999,18 @@ define <1 x i64> @test_bitcastv8i8tov1f64(<8 x i8> %a) #0 { } define <1 x i64> @test_bitcastv4i16tov1f64(<4 x i16> %a) #0 { -; CHECK-LABEL: test_bitcastv4i16tov1f64: -; CHECK: // %bb.0: -; CHECK-NEXT: neg v0.4h, v0.4h -; CHECK-NEXT: fcvtzs x8, d0 -; CHECK-NEXT: fmov d0, x8 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: test_bitcastv4i16tov1f64: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: neg v0.4h, v0.4h +; CHECK-SD-NEXT: fcvtzs x8, d0 +; CHECK-SD-NEXT: fmov d0, x8 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: test_bitcastv4i16tov1f64: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: neg v0.4h, v0.4h +; CHECK-GI-NEXT: fcvtzs d0, d0 +; CHECK-GI-NEXT: ret %sub.i = sub <4 x i16> zeroinitializer, %a %1 = bitcast <4 x i16> %sub.i to <1 x double> %vcvt.i = fptosi <1 x double> %1 to <1 x i64> @@ -1006,12 +1018,18 @@ define <1 x i64> @test_bitcastv4i16tov1f64(<4 x i16> %a) #0 { } define <1 x i64> @test_bitcastv2i32tov1f64(<2 x i32> %a) #0 { -; CHECK-LABEL: test_bitcastv2i32tov1f64: -; CHECK: // %bb.0: -; CHECK-NEXT: neg v0.2s, v0.2s -; CHECK-NEXT: fcvtzs x8, d0 -; CHECK-NEXT: fmov d0, x8 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: test_bitcastv2i32tov1f64: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: neg v0.2s, v0.2s +; CHECK-SD-NEXT: fcvtzs x8, d0 +; CHECK-SD-NEXT: fmov d0, x8 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: test_bitcastv2i32tov1f64: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: neg v0.2s, v0.2s +; CHECK-GI-NEXT: fcvtzs d0, d0 +; CHECK-GI-NEXT: ret %sub.i = sub <2 x i32> zeroinitializer, %a %1 = bitcast <2 x i32> %sub.i to <1 x double> %vcvt.i = fptosi <1 x double> %1 to <1 x i64> @@ -1031,8 +1049,7 @@ define <1 x i64> @test_bitcastv1i64tov1f64(<1 x i64> %a) #0 { ; CHECK-GI-NEXT: fmov x8, d0 ; CHECK-GI-NEXT: neg x8, x8 ; CHECK-GI-NEXT: fmov d0, x8 -; CHECK-GI-NEXT: fcvtzs x8, d0 -; CHECK-GI-NEXT: fmov d0, x8 +; CHECK-GI-NEXT: fcvtzs d0, d0 ; CHECK-GI-NEXT: ret %sub.i = sub <1 x i64> zeroinitializer, %a %1 = bitcast <1 x i64> %sub.i to <1 x double> diff --git a/llvm/test/CodeGen/AArch64/arm64-vcvt.ll b/llvm/test/CodeGen/AArch64/arm64-vcvt.ll index 627d31f..1e0cfa0 100644 --- a/llvm/test/CodeGen/AArch64/arm64-vcvt.ll +++ b/llvm/test/CodeGen/AArch64/arm64-vcvt.ll @@ -359,11 +359,16 @@ define <2 x i64> @fcvtzs_2d(<2 x double> %A) nounwind { ; FIXME: Generate "fcvtzs d0, d0"? define <1 x i64> @fcvtzs_1d(<1 x double> %A) nounwind { -; CHECK-LABEL: fcvtzs_1d: -; CHECK: // %bb.0: -; CHECK-NEXT: fcvtzs x8, d0 -; CHECK-NEXT: fmov d0, x8 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: fcvtzs_1d: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcvtzs x8, d0 +; CHECK-SD-NEXT: fmov d0, x8 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fcvtzs_1d: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: fcvtzs d0, d0 +; CHECK-GI-NEXT: ret %tmp3 = fptosi <1 x double> %A to <1 x i64> ret <1 x i64> %tmp3 } @@ -438,11 +443,16 @@ define <2 x i64> @fcvtzu_2d(<2 x double> %A) nounwind { ; FIXME: Generate "fcvtzu d0, d0"? define <1 x i64> @fcvtzu_1d(<1 x double> %A) nounwind { -; CHECK-LABEL: fcvtzu_1d: -; CHECK: // %bb.0: -; CHECK-NEXT: fcvtzu x8, d0 -; CHECK-NEXT: fmov d0, x8 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: fcvtzu_1d: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcvtzu x8, d0 +; CHECK-SD-NEXT: fmov d0, x8 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fcvtzu_1d: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: fcvtzu d0, d0 +; CHECK-GI-NEXT: ret %tmp3 = fptoui <1 x double> %A to <1 x i64> ret <1 x i64> %tmp3 } diff --git a/llvm/test/CodeGen/AArch64/dup-ext-load-combine.ll b/llvm/test/CodeGen/AArch64/dup-ext-load-combine.ll new file mode 100644 index 0000000..cf52934 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/dup-ext-load-combine.ll @@ -0,0 +1,178 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=aarch64-none-linux-gnu < %s | FileCheck %s + +; Test optimization of DUP with extended narrow loads +; This should avoid GPR->SIMD transfers by loading directly into vector registers + +define <4 x i16> @test_dup_zextload_i8_v4i16(ptr %p) { +; CHECK-LABEL: test_dup_zextload_i8_v4i16: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr b0, [x0] +; CHECK-NEXT: dup v0.4h, v0.h[0] +; CHECK-NEXT: ret + %load = load i8, ptr %p, align 1 + %ext = zext i8 %load to i16 + %vec = insertelement <4 x i16> poison, i16 %ext, i32 0 + %dup = shufflevector <4 x i16> %vec, <4 x i16> poison, <4 x i32> zeroinitializer + ret <4 x i16> %dup +} + +define <8 x i16> @test_dup_zextload_i8_v8i16(ptr %p) { +; CHECK-LABEL: test_dup_zextload_i8_v8i16: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr b0, [x0] +; CHECK-NEXT: dup v0.8h, v0.h[0] +; CHECK-NEXT: ret + %load = load i8, ptr %p, align 1 + %ext = zext i8 %load to i16 + %vec = insertelement <8 x i16> poison, i16 %ext, i32 0 + %dup = shufflevector <8 x i16> %vec, <8 x i16> poison, <8 x i32> zeroinitializer + ret <8 x i16> %dup +} + +define <2 x i32> @test_dup_zextload_i8_v2i32(ptr %p) { +; CHECK-LABEL: test_dup_zextload_i8_v2i32: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr b0, [x0] +; CHECK-NEXT: dup v0.2s, v0.s[0] +; CHECK-NEXT: ret + %load = load i8, ptr %p, align 1 + %ext = zext i8 %load to i32 + %vec = insertelement <2 x i32> poison, i32 %ext, i32 0 + %dup = shufflevector <2 x i32> %vec, <2 x i32> poison, <2 x i32> zeroinitializer + ret <2 x i32> %dup +} + +define <4 x i32> @test_dup_zextload_i8_v4i32(ptr %p) { +; CHECK-LABEL: test_dup_zextload_i8_v4i32: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr b0, [x0] +; CHECK-NEXT: dup v0.4s, v0.s[0] +; CHECK-NEXT: ret + %load = load i8, ptr %p, align 1 + %ext = zext i8 %load to i32 + %vec = insertelement <4 x i32> poison, i32 %ext, i32 0 + %dup = shufflevector <4 x i32> %vec, <4 x i32> poison, <4 x i32> zeroinitializer + ret <4 x i32> %dup +} + +define <4 x i32> @test_dup_zextload_i8_v4i32_offset(ptr %p) { +; CHECK-LABEL: test_dup_zextload_i8_v4i32_offset: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr b0, [x0, #4] +; CHECK-NEXT: dup v0.4s, v0.s[0] +; CHECK-NEXT: ret + %addr = getelementptr inbounds i8, ptr %p, i64 4 + %load = load i8, ptr %addr, align 1 + %ext = zext i8 %load to i32 + %vec = insertelement <4 x i32> poison, i32 %ext, i32 0 + %dup = shufflevector <4 x i32> %vec, <4 x i32> poison, <4 x i32> zeroinitializer + ret <4 x i32> %dup +} + +define <4 x i32> @test_dup_zextload_i8_v4i32_reg_offset(ptr %p, i64 %offset) { +; CHECK-LABEL: test_dup_zextload_i8_v4i32_reg_offset: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr b0, [x0, x1] +; CHECK-NEXT: dup v0.4s, v0.s[0] +; CHECK-NEXT: ret + %addr = getelementptr inbounds i8, ptr %p, i64 %offset + %load = load i8, ptr %addr, align 1 + %ext = zext i8 %load to i32 + %vec = insertelement <4 x i32> poison, i32 %ext, i32 0 + %dup = shufflevector <4 x i32> %vec, <4 x i32> poison, <4 x i32> zeroinitializer + ret <4 x i32> %dup +} + +define <2 x i64> @test_dup_zextload_i8_v2i64(ptr %p) { +; CHECK-LABEL: test_dup_zextload_i8_v2i64: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr b0, [x0] +; CHECK-NEXT: dup v0.2d, v0.d[0] +; CHECK-NEXT: ret + %load = load i8, ptr %p, align 1 + %ext = zext i8 %load to i64 + %vec = insertelement <2 x i64> poison, i64 %ext, i32 0 + %dup = shufflevector <2 x i64> %vec, <2 x i64> poison, <2 x i32> zeroinitializer + ret <2 x i64> %dup +} + +define <2 x i32> @test_dup_zextload_i16_v2i32(ptr %p) { +; CHECK-LABEL: test_dup_zextload_i16_v2i32: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr h0, [x0] +; CHECK-NEXT: dup v0.2s, v0.s[0] +; CHECK-NEXT: ret + %load = load i16, ptr %p, align 1 + %ext = zext i16 %load to i32 + %vec = insertelement <2 x i32> poison, i32 %ext, i32 0 + %dup = shufflevector <2 x i32> %vec, <2 x i32> poison, <2 x i32> zeroinitializer + ret <2 x i32> %dup +} + +define <4 x i32> @test_dup_zextload_i16_v4i32(ptr %p) { +; CHECK-LABEL: test_dup_zextload_i16_v4i32: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr h0, [x0] +; CHECK-NEXT: dup v0.4s, v0.s[0] +; CHECK-NEXT: ret + %load = load i16, ptr %p, align 1 + %ext = zext i16 %load to i32 + %vec = insertelement <4 x i32> poison, i32 %ext, i32 0 + %dup = shufflevector <4 x i32> %vec, <4 x i32> poison, <4 x i32> zeroinitializer + ret <4 x i32> %dup +} + +define <4 x i32> @test_dup_zextload_i16_v4i32_offset(ptr %p) { +; CHECK-LABEL: test_dup_zextload_i16_v4i32_offset: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr h0, [x0, #8] +; CHECK-NEXT: dup v0.4s, v0.s[0] +; CHECK-NEXT: ret + %addr = getelementptr inbounds i16, ptr %p, i64 4 + %load = load i16, ptr %addr, align 1 + %ext = zext i16 %load to i32 + %vec = insertelement <4 x i32> poison, i32 %ext, i32 0 + %dup = shufflevector <4 x i32> %vec, <4 x i32> poison, <4 x i32> zeroinitializer + ret <4 x i32> %dup +} + +define <4 x i32> @test_dup_zextload_i16_v4i32_reg_offset(ptr %p, i64 %offset) { +; CHECK-LABEL: test_dup_zextload_i16_v4i32_reg_offset: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr h0, [x0, x1, lsl #1] +; CHECK-NEXT: dup v0.4s, v0.s[0] +; CHECK-NEXT: ret + %addr = getelementptr inbounds i16, ptr %p, i64 %offset + %load = load i16, ptr %addr, align 1 + %ext = zext i16 %load to i32 + %vec = insertelement <4 x i32> poison, i32 %ext, i32 0 + %dup = shufflevector <4 x i32> %vec, <4 x i32> poison, <4 x i32> zeroinitializer + ret <4 x i32> %dup +} + +define <2 x i64> @test_dup_zextload_i16_v2i64(ptr %p) { +; CHECK-LABEL: test_dup_zextload_i16_v2i64: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr h0, [x0] +; CHECK-NEXT: dup v0.2d, v0.d[0] +; CHECK-NEXT: ret + %load = load i16, ptr %p, align 1 + %ext = zext i16 %load to i64 + %vec = insertelement <2 x i64> poison, i64 %ext, i32 0 + %dup = shufflevector <2 x i64> %vec, <2 x i64> poison, <2 x i32> zeroinitializer + ret <2 x i64> %dup +} + +define <2 x i64> @test_dup_zextload_i32_v2i64(ptr %p) { +; CHECK-LABEL: test_dup_zextload_i32_v2i64: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr s0, [x0] +; CHECK-NEXT: dup v0.2d, v0.d[0] +; CHECK-NEXT: ret + %load = load i32, ptr %p, align 1 + %ext = zext i32 %load to i64 + %vec = insertelement <2 x i64> poison, i64 %ext, i32 0 + %dup = shufflevector <2 x i64> %vec, <2 x i64> poison, <2 x i32> zeroinitializer + ret <2 x i64> %dup +} diff --git a/llvm/test/CodeGen/AArch64/dup.ll b/llvm/test/CodeGen/AArch64/dup.ll index 079ff10..670574f2 100644 --- a/llvm/test/CodeGen/AArch64/dup.ll +++ b/llvm/test/CodeGen/AArch64/dup.ll @@ -32,8 +32,8 @@ entry: define <2 x i8> @loaddup_v2i8(ptr %p) { ; CHECK-LABEL: loaddup_v2i8: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: ldrb w8, [x0] -; CHECK-NEXT: dup v0.2s, w8 +; CHECK-NEXT: ldr b0, [x0] +; CHECK-NEXT: dup v0.2s, v0.s[0] ; CHECK-NEXT: ret entry: %a = load i8, ptr %p @@ -189,8 +189,8 @@ entry: define <4 x i8> @loaddup_v4i8(ptr %p) { ; CHECK-SD-LABEL: loaddup_v4i8: ; CHECK-SD: // %bb.0: // %entry -; CHECK-SD-NEXT: ldrb w8, [x0] -; CHECK-SD-NEXT: dup v0.4h, w8 +; CHECK-SD-NEXT: ldr b0, [x0] +; CHECK-SD-NEXT: dup v0.4h, v0.h[0] ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: loaddup_v4i8: @@ -444,8 +444,8 @@ entry: define <2 x i16> @loaddup_v2i16(ptr %p) { ; CHECK-SD-LABEL: loaddup_v2i16: ; CHECK-SD: // %bb.0: // %entry -; CHECK-SD-NEXT: ldrh w8, [x0] -; CHECK-SD-NEXT: dup v0.2s, w8 +; CHECK-SD-NEXT: ldr h0, [x0] +; CHECK-SD-NEXT: dup v0.2s, v0.s[0] ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: loaddup_v2i16: diff --git a/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll b/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll index c741129..b963acd 100644 --- a/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll +++ b/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll @@ -31,8 +31,7 @@ define <1 x i32> @test_signed_v1f32_v1i32(<1 x float> %f) { ; ; CHECK-GI-LABEL: test_signed_v1f32_v1i32: ; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: fcvtzs w8, s0 -; CHECK-GI-NEXT: fmov s0, w8 +; CHECK-GI-NEXT: fcvtzs s0, s0 ; CHECK-GI-NEXT: ret %x = call <1 x i32> @llvm.fptosi.sat.v1f32.v1i32(<1 x float> %f) ret <1 x i32> %x @@ -1162,18 +1161,24 @@ declare <7 x i32> @llvm.fptosi.sat.v7f16.v7i32 (<7 x half>) declare <8 x i32> @llvm.fptosi.sat.v8f16.v8i32 (<8 x half>) define <1 x i32> @test_signed_v1f16_v1i32(<1 x half> %f) { -; CHECK-CVT-LABEL: test_signed_v1f16_v1i32: -; CHECK-CVT: // %bb.0: -; CHECK-CVT-NEXT: fcvt s0, h0 -; CHECK-CVT-NEXT: fcvtzs w8, s0 -; CHECK-CVT-NEXT: fmov s0, w8 -; CHECK-CVT-NEXT: ret +; CHECK-SD-CVT-LABEL: test_signed_v1f16_v1i32: +; CHECK-SD-CVT: // %bb.0: +; CHECK-SD-CVT-NEXT: fcvt s0, h0 +; CHECK-SD-CVT-NEXT: fcvtzs w8, s0 +; CHECK-SD-CVT-NEXT: fmov s0, w8 +; CHECK-SD-CVT-NEXT: ret ; ; CHECK-FP16-LABEL: test_signed_v1f16_v1i32: ; CHECK-FP16: // %bb.0: ; CHECK-FP16-NEXT: fcvtzs w8, h0 ; CHECK-FP16-NEXT: fmov s0, w8 ; CHECK-FP16-NEXT: ret +; +; CHECK-GI-CVT-LABEL: test_signed_v1f16_v1i32: +; CHECK-GI-CVT: // %bb.0: +; CHECK-GI-CVT-NEXT: fcvt s0, h0 +; CHECK-GI-CVT-NEXT: fcvtzs s0, s0 +; CHECK-GI-CVT-NEXT: ret %x = call <1 x i32> @llvm.fptosi.sat.v1f16.v1i32(<1 x half> %f) ret <1 x i32> %x } diff --git a/llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll b/llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll index efe0a1b..5a66b68 100644 --- a/llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll +++ b/llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll @@ -31,8 +31,7 @@ define <1 x i32> @test_unsigned_v1f32_v1i32(<1 x float> %f) { ; ; CHECK-GI-LABEL: test_unsigned_v1f32_v1i32: ; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: fcvtzu w8, s0 -; CHECK-GI-NEXT: fmov s0, w8 +; CHECK-GI-NEXT: fcvtzu s0, s0 ; CHECK-GI-NEXT: ret %x = call <1 x i32> @llvm.fptoui.sat.v1f32.v1i32(<1 x float> %f) ret <1 x i32> %x @@ -993,18 +992,24 @@ declare <7 x i32> @llvm.fptoui.sat.v7f16.v7i32 (<7 x half>) declare <8 x i32> @llvm.fptoui.sat.v8f16.v8i32 (<8 x half>) define <1 x i32> @test_unsigned_v1f16_v1i32(<1 x half> %f) { -; CHECK-CVT-LABEL: test_unsigned_v1f16_v1i32: -; CHECK-CVT: // %bb.0: -; CHECK-CVT-NEXT: fcvt s0, h0 -; CHECK-CVT-NEXT: fcvtzu w8, s0 -; CHECK-CVT-NEXT: fmov s0, w8 -; CHECK-CVT-NEXT: ret +; CHECK-SD-CVT-LABEL: test_unsigned_v1f16_v1i32: +; CHECK-SD-CVT: // %bb.0: +; CHECK-SD-CVT-NEXT: fcvt s0, h0 +; CHECK-SD-CVT-NEXT: fcvtzu w8, s0 +; CHECK-SD-CVT-NEXT: fmov s0, w8 +; CHECK-SD-CVT-NEXT: ret ; ; CHECK-FP16-LABEL: test_unsigned_v1f16_v1i32: ; CHECK-FP16: // %bb.0: ; CHECK-FP16-NEXT: fcvtzu w8, h0 ; CHECK-FP16-NEXT: fmov s0, w8 ; CHECK-FP16-NEXT: ret +; +; CHECK-GI-CVT-LABEL: test_unsigned_v1f16_v1i32: +; CHECK-GI-CVT: // %bb.0: +; CHECK-GI-CVT-NEXT: fcvt s0, h0 +; CHECK-GI-CVT-NEXT: fcvtzu s0, s0 +; CHECK-GI-CVT-NEXT: ret %x = call <1 x i32> @llvm.fptoui.sat.v1f16.v1i32(<1 x half> %f) ret <1 x i32> %x } diff --git a/llvm/test/CodeGen/AArch64/ldst-prepost-uses.ll b/llvm/test/CodeGen/AArch64/ldst-prepost-uses.ll new file mode 100644 index 0000000..85991fb --- /dev/null +++ b/llvm/test/CodeGen/AArch64/ldst-prepost-uses.ll @@ -0,0 +1,73 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 +; RUN: llc < %s -O3 -mtriple=aarch64 | FileCheck %s + +; From #164775, this generates a pre-index load feeding a post-index store, that +; was checking the wrong uses for post-inc. It seems quite delicate for it to +; generate this combination at the wrong point to hit the same issue. + +@g_260 = dso_local global i16 0 +@g_480 = dso_local global i16 0 + +define i32 @func_1(ptr %l_3253) { +; CHECK-LABEL: func_1: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: sub sp, sp, #128 +; CHECK-NEXT: .cfi_def_cfa_offset 128 +; CHECK-NEXT: movi v0.2d, #0000000000000000 +; CHECK-NEXT: mov w9, #2 // =0x2 +; CHECK-NEXT: mov w10, #96 // =0x60 +; CHECK-NEXT: strb wzr, [x9] +; CHECK-NEXT: mov w9, #111 // =0x6f +; CHECK-NEXT: mov x8, xzr +; CHECK-NEXT: str wzr, [x9] +; CHECK-NEXT: mov w9, #80 // =0x50 +; CHECK-NEXT: adrp x1, .L_MergedGlobals +; CHECK-NEXT: add x1, x1, :lo12:.L_MergedGlobals +; CHECK-NEXT: strh wzr, [x8] +; CHECK-NEXT: str q0, [x9] +; CHECK-NEXT: mov w9, #48 // =0x30 +; CHECK-NEXT: str q0, [x9] +; CHECK-NEXT: mov w9, #32 // =0x20 +; CHECK-NEXT: str q0, [x10] +; CHECK-NEXT: mov w10, #64 // =0x40 +; CHECK-NEXT: str q0, [x9] +; CHECK-NEXT: mov w9, #16 // =0x10 +; CHECK-NEXT: str q0, [x10] +; CHECK-NEXT: str q0, [x9] +; CHECK-NEXT: str q0, [x8] +; CHECK-NEXT: adrp x8, .L_MergedGlobals +; CHECK-NEXT: strb wzr, [x0, #8] +; CHECK-NEXT: strb wzr, [x0, #12] +; CHECK-NEXT: strb wzr, [x0, #16] +; CHECK-NEXT: strb wzr, [x0, #20] +; CHECK-NEXT: mov w0, wzr +; CHECK-NEXT: ldrh wzr, [x8, :lo12:.L_MergedGlobals] +; CHECK-NEXT: ldrh w8, [x1, #4]! +; CHECK-NEXT: sub w8, w8, #1 +; CHECK-NEXT: strh w8, [x1] +; CHECK-NEXT: add sp, sp, #128 +; CHECK-NEXT: b use +entry: + %l_32531.sroa.3 = alloca [3 x i8], align 4 + %l_32531.sroa.4 = alloca [115 x i8], align 4 + call void @llvm.lifetime.start.p0(ptr %l_32531.sroa.3) + call void @llvm.lifetime.start.p0(ptr %l_32531.sroa.4) + call void @llvm.memset.p0.i64(ptr null, i8 0, i64 3, i1 false) + call void @llvm.memset.p0.i64(ptr null, i8 0, i64 115, i1 false) + %0 = getelementptr inbounds i8, ptr %l_3253, i64 8 + store i8 0, ptr %0, align 4 + %1 = getelementptr inbounds i8, ptr %l_3253, i64 12 + store i8 0, ptr %1, align 4 + %2 = getelementptr inbounds i8, ptr %l_3253, i64 16 + store i8 0, ptr %2, align 4 + %3 = getelementptr inbounds i8, ptr %l_3253, i64 20 + store i8 0, ptr %3, align 4 + %4 = load volatile i16, ptr @g_260, align 4 + %5 = load i16, ptr @g_480, align 4 + %dec.i.i = add i16 %5, -1 + store i16 %dec.i.i, ptr @g_480, align 4 + %call1 = tail call i32 @use(i32 0, ptr @g_480) + ret i32 %call1 +} + +declare i32 @use(i32, ptr) diff --git a/llvm/test/CodeGen/AArch64/load-zext-bitcast.ll b/llvm/test/CodeGen/AArch64/load-zext-bitcast.ll index 9193025..628506b 100644 --- a/llvm/test/CodeGen/AArch64/load-zext-bitcast.ll +++ b/llvm/test/CodeGen/AArch64/load-zext-bitcast.ll @@ -84,8 +84,7 @@ entry: define double @load_u64_from_u32_off1(ptr %n){ ; CHECK-LABEL: load_u64_from_u32_off1: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: ldur w8, [x0, #1] -; CHECK-NEXT: fmov d0, x8 +; CHECK-NEXT: ldur s0, [x0, #1] ; CHECK-NEXT: ret entry: %p = getelementptr i8, ptr %n, i64 1 @@ -98,8 +97,7 @@ entry: define double @load_u64_from_u16_off1(ptr %n){ ; CHECK-LABEL: load_u64_from_u16_off1: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: ldurh w8, [x0, #1] -; CHECK-NEXT: fmov d0, x8 +; CHECK-NEXT: ldur h0, [x0, #1] ; CHECK-NEXT: ret entry: %p = getelementptr i8, ptr %n, i64 1 @@ -112,8 +110,7 @@ entry: define double @load_u64_from_u8_off1(ptr %n){ ; CHECK-LABEL: load_u64_from_u8_off1: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: ldrb w8, [x0, #1] -; CHECK-NEXT: fmov d0, x8 +; CHECK-NEXT: ldr b0, [x0, #1] ; CHECK-NEXT: ret entry: %p = getelementptr i8, ptr %n, i64 1 @@ -126,8 +123,7 @@ entry: define float @load_u32_from_u16_off1(ptr %n){ ; CHECK-LABEL: load_u32_from_u16_off1: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: ldurh w8, [x0, #1] -; CHECK-NEXT: fmov s0, w8 +; CHECK-NEXT: ldur h0, [x0, #1] ; CHECK-NEXT: ret entry: %p = getelementptr i8, ptr %n, i64 1 @@ -140,8 +136,7 @@ entry: define float @load_u32_from_u8_off1(ptr %n){ ; CHECK-LABEL: load_u32_from_u8_off1: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: ldrb w8, [x0, #1] -; CHECK-NEXT: fmov s0, w8 +; CHECK-NEXT: ldr b0, [x0, #1] ; CHECK-NEXT: ret entry: %p = getelementptr i8, ptr %n, i64 1 @@ -154,8 +149,7 @@ entry: define half @load_u16_from_u8_off1(ptr %n){ ; CHECK-LABEL: load_u16_from_u8_off1: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: ldrb w8, [x0, #1] -; CHECK-NEXT: fmov s0, w8 +; CHECK-NEXT: ldr b0, [x0, #1] ; CHECK-NEXT: // kill: def $h0 killed $h0 killed $s0 ; CHECK-NEXT: ret entry: @@ -171,8 +165,7 @@ entry: define double @load_u64_from_u32_off2(ptr %n){ ; CHECK-LABEL: load_u64_from_u32_off2: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: ldur w8, [x0, #2] -; CHECK-NEXT: fmov d0, x8 +; CHECK-NEXT: ldur s0, [x0, #2] ; CHECK-NEXT: ret entry: %p = getelementptr i8, ptr %n, i64 2 @@ -185,8 +178,7 @@ entry: define double @load_u64_from_u16_off2(ptr %n){ ; CHECK-LABEL: load_u64_from_u16_off2: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: ldrh w8, [x0, #2] -; CHECK-NEXT: fmov d0, x8 +; CHECK-NEXT: ldr h0, [x0, #2] ; CHECK-NEXT: ret entry: %p = getelementptr i8, ptr %n, i64 2 @@ -199,8 +191,7 @@ entry: define double @load_u64_from_u8_off2(ptr %n){ ; CHECK-LABEL: load_u64_from_u8_off2: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: ldrb w8, [x0, #2] -; CHECK-NEXT: fmov d0, x8 +; CHECK-NEXT: ldr b0, [x0, #2] ; CHECK-NEXT: ret entry: %p = getelementptr i8, ptr %n, i64 2 @@ -226,7 +217,7 @@ entry: define float @load_u32_from_u8_off2(ptr %n){ ; CHECK-LABEL: load_u32_from_u8_off2: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: ldr b0, [x0, #1] +; CHECK-NEXT: ldr b0, [x0, #2] ; CHECK-NEXT: ret entry: %p = getelementptr i8, ptr %n, i64 2 @@ -239,7 +230,7 @@ entry: define half @load_u16_from_u8_off2(ptr %n){ ; CHECK-LABEL: load_u16_from_u8_off2: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: ldr b0, [x0, #1] +; CHECK-NEXT: ldr b0, [x0, #2] ; CHECK-NEXT: // kill: def $h0 killed $h0 killed $s0 ; CHECK-NEXT: ret entry: @@ -255,8 +246,7 @@ entry: define double @load_u64_from_u32_off255(ptr %n){ ; CHECK-LABEL: load_u64_from_u32_off255: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: ldur w8, [x0, #255] -; CHECK-NEXT: fmov d0, x8 +; CHECK-NEXT: ldur s0, [x0, #255] ; CHECK-NEXT: ret entry: %p = getelementptr i8, ptr %n, i64 255 @@ -269,8 +259,7 @@ entry: define double @load_u64_from_u16_off255(ptr %n){ ; CHECK-LABEL: load_u64_from_u16_off255: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: ldurh w8, [x0, #255] -; CHECK-NEXT: fmov d0, x8 +; CHECK-NEXT: ldur h0, [x0, #255] ; CHECK-NEXT: ret entry: %p = getelementptr i8, ptr %n, i64 255 @@ -283,8 +272,7 @@ entry: define double @load_u64_from_u8_off255(ptr %n){ ; CHECK-LABEL: load_u64_from_u8_off255: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: ldrb w8, [x0, #255] -; CHECK-NEXT: fmov d0, x8 +; CHECK-NEXT: ldr b0, [x0, #255] ; CHECK-NEXT: ret entry: %p = getelementptr i8, ptr %n, i64 255 @@ -297,8 +285,7 @@ entry: define float @load_u32_from_u16_off255(ptr %n){ ; CHECK-LABEL: load_u32_from_u16_off255: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: ldurh w8, [x0, #255] -; CHECK-NEXT: fmov s0, w8 +; CHECK-NEXT: ldur h0, [x0, #255] ; CHECK-NEXT: ret entry: %p = getelementptr i8, ptr %n, i64 255 @@ -311,8 +298,7 @@ entry: define float @load_u32_from_u8_off255(ptr %n){ ; CHECK-LABEL: load_u32_from_u8_off255: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: ldrb w8, [x0, #255] -; CHECK-NEXT: fmov s0, w8 +; CHECK-NEXT: ldr b0, [x0, #255] ; CHECK-NEXT: ret entry: %p = getelementptr i8, ptr %n, i64 255 @@ -325,8 +311,7 @@ entry: define half @load_u16_from_u8_off255(ptr %n){ ; CHECK-LABEL: load_u16_from_u8_off255: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: ldrb w8, [x0, #255] -; CHECK-NEXT: fmov s0, w8 +; CHECK-NEXT: ldr b0, [x0, #255] ; CHECK-NEXT: // kill: def $h0 killed $h0 killed $s0 ; CHECK-NEXT: ret entry: @@ -354,7 +339,7 @@ entry: define double @load_u64_from_u16_off256(ptr %n){ ; CHECK-LABEL: load_u64_from_u16_off256: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: ldr h0, [x0, #128] +; CHECK-NEXT: ldr h0, [x0, #256] ; CHECK-NEXT: ret entry: %p = getelementptr i8, ptr %n, i64 256 @@ -367,7 +352,7 @@ entry: define double @load_u64_from_u8_off256(ptr %n){ ; CHECK-LABEL: load_u64_from_u8_off256: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: ldr b0, [x0, #64] +; CHECK-NEXT: ldr b0, [x0, #256] ; CHECK-NEXT: ret entry: %p = getelementptr i8, ptr %n, i64 256 @@ -393,7 +378,7 @@ entry: define float @load_u32_from_u8_off256(ptr %n){ ; CHECK-LABEL: load_u32_from_u8_off256: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: ldr b0, [x0, #128] +; CHECK-NEXT: ldr b0, [x0, #256] ; CHECK-NEXT: ret entry: %p = getelementptr i8, ptr %n, i64 256 @@ -406,7 +391,7 @@ entry: define half @load_u16_from_u8_off256(ptr %n){ ; CHECK-LABEL: load_u16_from_u8_off256: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: ldr b0, [x0, #128] +; CHECK-NEXT: ldr b0, [x0, #256] ; CHECK-NEXT: // kill: def $h0 killed $h0 killed $s0 ; CHECK-NEXT: ret entry: @@ -435,8 +420,7 @@ entry: define double @load_u64_from_u16_offn(ptr %n){ ; CHECK-LABEL: load_u64_from_u16_offn: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: mov w8, #8190 // =0x1ffe -; CHECK-NEXT: ldr h0, [x0, x8] +; CHECK-NEXT: ldr h0, [x0, #8190] ; CHECK-NEXT: ret entry: %p = getelementptr i8, ptr %n, i64 8190 @@ -503,8 +487,8 @@ entry: define double @load_u64_from_u32_offnp1(ptr %n){ ; CHECK-LABEL: load_u64_from_u32_offnp1: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: add x8, x0, #4, lsl #12 // =16384 -; CHECK-NEXT: ldr s0, [x8] +; CHECK-NEXT: mov w8, #16384 // =0x4000 +; CHECK-NEXT: ldr s0, [x0, x8] ; CHECK-NEXT: ret entry: %p = getelementptr i8, ptr %n, i64 16384 @@ -517,7 +501,8 @@ entry: define double @load_u64_from_u16_offnp1(ptr %n){ ; CHECK-LABEL: load_u64_from_u16_offnp1: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: ldr h0, [x0, #4096] +; CHECK-NEXT: mov w8, #8192 // =0x2000 +; CHECK-NEXT: ldr h0, [x0, x8] ; CHECK-NEXT: ret entry: %p = getelementptr i8, ptr %n, i64 8192 @@ -530,7 +515,8 @@ entry: define double @load_u64_from_u8_offnp1(ptr %n){ ; CHECK-LABEL: load_u64_from_u8_offnp1: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: ldr b0, [x0, #1024] +; CHECK-NEXT: mov w8, #4096 // =0x1000 +; CHECK-NEXT: ldr b0, [x0, x8] ; CHECK-NEXT: ret entry: %p = getelementptr i8, ptr %n, i64 4096 @@ -543,8 +529,8 @@ entry: define float @load_u32_from_u16_offnp1(ptr %n){ ; CHECK-LABEL: load_u32_from_u16_offnp1: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: add x8, x0, #2, lsl #12 // =8192 -; CHECK-NEXT: ldr h0, [x8] +; CHECK-NEXT: mov w8, #8192 // =0x2000 +; CHECK-NEXT: ldr h0, [x0, x8] ; CHECK-NEXT: ret entry: %p = getelementptr i8, ptr %n, i64 8192 @@ -557,7 +543,8 @@ entry: define float @load_u32_from_u8_offnp1(ptr %n){ ; CHECK-LABEL: load_u32_from_u8_offnp1: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: ldr b0, [x0, #2048] +; CHECK-NEXT: mov w8, #4096 // =0x1000 +; CHECK-NEXT: ldr b0, [x0, x8] ; CHECK-NEXT: ret entry: %p = getelementptr i8, ptr %n, i64 4096 @@ -570,7 +557,8 @@ entry: define half @load_u16_from_u8_offnp1(ptr %n){ ; CHECK-LABEL: load_u16_from_u8_offnp1: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: ldr b0, [x0, #2048] +; CHECK-NEXT: mov w8, #4096 // =0x1000 +; CHECK-NEXT: ldr b0, [x0, x8] ; CHECK-NEXT: // kill: def $h0 killed $h0 killed $s0 ; CHECK-NEXT: ret entry: diff --git a/llvm/test/CodeGen/AArch64/sme-intrinsics-rdsvl.ll b/llvm/test/CodeGen/AArch64/sme-intrinsics-rdsvl.ll index 06c53d8..b17b48c 100644 --- a/llvm/test/CodeGen/AArch64/sme-intrinsics-rdsvl.ll +++ b/llvm/test/CodeGen/AArch64/sme-intrinsics-rdsvl.ll @@ -86,4 +86,111 @@ define i64 @sme_cntsd_mul() { ret i64 %res } -declare i64 @llvm.aarch64.sme.cntsd() +define i64 @sme_cntsb_mul_pos() { +; CHECK-LABEL: sme_cntsb_mul_pos: +; CHECK: // %bb.0: +; CHECK-NEXT: rdsvl x8, #24 +; CHECK-NEXT: lsl x0, x8, #2 +; CHECK-NEXT: ret + %v = call i64 @llvm.aarch64.sme.cntsd() + %shl = shl nuw nsw i64 %v, 3 + %res = mul nuw nsw i64 %shl, 96 + ret i64 %res +} + +define i64 @sme_cntsh_mul_pos() { +; CHECK-LABEL: sme_cntsh_mul_pos: +; CHECK: // %bb.0: +; CHECK-NEXT: rdsvl x8, #3 +; CHECK-NEXT: lsr x0, x8, #1 +; CHECK-NEXT: ret + %v = call i64 @llvm.aarch64.sme.cntsd() + %shl = shl nuw nsw i64 %v, 2 + %res = mul nuw nsw i64 %shl, 3 + ret i64 %res +} + +define i64 @sme_cntsw_mul_pos() { +; CHECK-LABEL: sme_cntsw_mul_pos: +; CHECK: // %bb.0: +; CHECK-NEXT: rdsvl x8, #31 +; CHECK-NEXT: lsr x0, x8, #1 +; CHECK-NEXT: ret + %v = call i64 @llvm.aarch64.sme.cntsd() + %shl = shl nuw nsw i64 %v, 1 + %res = mul nuw nsw i64 %shl, 62 + ret i64 %res +} + +define i64 @sme_cntsd_mul_pos() { +; CHECK-LABEL: sme_cntsd_mul_pos: +; CHECK: // %bb.0: +; CHECK-NEXT: rdsvl x8, #31 +; CHECK-NEXT: lsl x0, x8, #2 +; CHECK-NEXT: ret + %v = call i64 @llvm.aarch64.sme.cntsd() + %res = mul nuw nsw i64 %v, 992 + ret i64 %res +} + +define i64 @sme_cntsb_mul_neg() { +; CHECK-LABEL: sme_cntsb_mul_neg: +; CHECK: // %bb.0: +; CHECK-NEXT: rdsvl x8, #-24 +; CHECK-NEXT: lsl x0, x8, #2 +; CHECK-NEXT: ret + %v = call i64 @llvm.aarch64.sme.cntsd() + %shl = shl nuw nsw i64 %v, 3 + %res = mul nuw nsw i64 %shl, -96 + ret i64 %res +} + +define i64 @sme_cntsh_mul_neg() { +; CHECK-LABEL: sme_cntsh_mul_neg: +; CHECK: // %bb.0: +; CHECK-NEXT: rdsvl x8, #-3 +; CHECK-NEXT: lsr x0, x8, #1 +; CHECK-NEXT: ret + %v = call i64 @llvm.aarch64.sme.cntsd() + %shl = shl nuw nsw i64 %v, 2 + %res = mul nuw nsw i64 %shl, -3 + ret i64 %res +} + +define i64 @sme_cntsw_mul_neg() { +; CHECK-LABEL: sme_cntsw_mul_neg: +; CHECK: // %bb.0: +; CHECK-NEXT: rdsvl x8, #-31 +; CHECK-NEXT: lsl x0, x8, #3 +; CHECK-NEXT: ret + %v = call i64 @llvm.aarch64.sme.cntsd() + %shl = shl nuw nsw i64 %v, 1 + %res = mul nuw nsw i64 %shl, -992 + ret i64 %res +} + +define i64 @sme_cntsd_mul_neg() { +; CHECK-LABEL: sme_cntsd_mul_neg: +; CHECK: // %bb.0: +; CHECK-NEXT: rdsvl x8, #-3 +; CHECK-NEXT: lsr x0, x8, #3 +; CHECK-NEXT: ret + %v = call i64 @llvm.aarch64.sme.cntsd() + %res = mul nuw nsw i64 %v, -3 + ret i64 %res +} + +; Negative test for optimization failure +define i64 @sme_cntsd_mul_fail() { +; CHECK-LABEL: sme_cntsd_mul_fail: +; CHECK: // %bb.0: +; CHECK-NEXT: rdsvl x8, #1 +; CHECK-NEXT: mov w9, #993 // =0x3e1 +; CHECK-NEXT: lsr x8, x8, #3 +; CHECK-NEXT: mul x0, x8, x9 +; CHECK-NEXT: ret + %v = call i64 @llvm.aarch64.sme.cntsd() + %res = mul nuw nsw i64 %v, 993 + ret i64 %res +} + diff --git a/llvm/test/CodeGen/AArch64/sme-za-exceptions.ll b/llvm/test/CodeGen/AArch64/sme-za-exceptions.ll index b8d6c88..3f35cb5 100644 --- a/llvm/test/CodeGen/AArch64/sme-za-exceptions.ll +++ b/llvm/test/CodeGen/AArch64/sme-za-exceptions.ll @@ -829,7 +829,7 @@ define void @try_catch_agnostic_za_invoke() "aarch64_za_state_agnostic" personal ; CHECK-SDAG-NEXT: bl __arm_sme_restore ; CHECK-SDAG-NEXT: b .LBB5_1 entry: - invoke void @agnostic_za_call() + invoke void @agnostic_za_call() "aarch64_za_state_agnostic" to label %exit unwind label %catch catch: |
