diff options
Diffstat (limited to 'llvm/test/CodeGen/AArch64/arm64-vmul.ll')
| -rw-r--r-- | llvm/test/CodeGen/AArch64/arm64-vmul.ll | 345 |
1 files changed, 195 insertions, 150 deletions
diff --git a/llvm/test/CodeGen/AArch64/arm64-vmul.ll b/llvm/test/CodeGen/AArch64/arm64-vmul.ll index e6df9f2..712452c 100644 --- a/llvm/test/CodeGen/AArch64/arm64-vmul.ll +++ b/llvm/test/CodeGen/AArch64/arm64-vmul.ll @@ -2,44 +2,35 @@ ; RUN: llc -mtriple=aarch64-none-elf -mattr=+aes < %s | FileCheck %s --check-prefixes=CHECK,CHECK-SD ; RUN: llc -mtriple=aarch64-none-elf -mattr=+aes -global-isel -global-isel-abort=2 2>&1 < %s | FileCheck %s --check-prefixes=CHECK,CHECK-GI -; CHECK-GI: warning: Instruction selection used fallback path for pmull8h -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for commutable_pmull8h -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqdmulh_1s -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_2s -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_4s -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_2d -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_commuted_neg_2s -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_commuted_neg_4s -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_commuted_neg_2d -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_indexed_2s -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_indexed_4s -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_indexed_2d -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_indexed_2s_strict -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_indexed_4s_strict -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_indexed_2d_strict -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmla_indexed_scalar_2s_strict -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmla_indexed_scalar_4s_strict -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmla_indexed_scalar_2d_strict -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqdmulh_lane_1s -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqdmlal_lane_1d -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqdmlsl_lane_1d -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for pmull_from_extract_dup_low -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for pmull_from_extract_dup_high -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for pmull_from_extract_duplane_low -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for pmull_from_extract_duplane_high -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for scalar_fmls_from_extract_v4f32 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for scalar_fmls_from_extract_v2f32 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for scalar_fmls_from_extract_v2f64 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_with_fneg_before_extract_v2f32 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_with_fneg_before_extract_v2f32_1 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_with_fneg_before_extract_v4f32 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_with_fneg_before_extract_v4f32_1 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_with_fneg_before_extract_v2f64 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqdmlal_d -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqdmlsl_d -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_pmull_64 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_pmull_high_64 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_commutable_pmull_64 +; CHECK-GI: warning: Instruction selection used fallback path for sqdmulh_1s +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_2s +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_4s +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_2d +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_commuted_neg_2s +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_commuted_neg_4s +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_commuted_neg_2d +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_indexed_2s +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_indexed_4s +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_indexed_2d +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_indexed_2s_strict +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_indexed_4s_strict +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_indexed_2d_strict +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmla_indexed_scalar_2s_strict +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmla_indexed_scalar_4s_strict +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmla_indexed_scalar_2d_strict +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqdmulh_lane_1s +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqdmlal_lane_1d +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqdmlsl_lane_1d +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for scalar_fmls_from_extract_v4f32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for scalar_fmls_from_extract_v2f32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for scalar_fmls_from_extract_v2f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_with_fneg_before_extract_v2f32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_with_fneg_before_extract_v2f32_1 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_with_fneg_before_extract_v4f32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_with_fneg_before_extract_v4f32_1 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_with_fneg_before_extract_v2f64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqdmlal_d +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqdmlsl_d define <8 x i16> @smull8h(ptr %A, ptr %B) nounwind { ; CHECK-LABEL: smull8h: @@ -1730,14 +1721,23 @@ define <2 x i64> @sqdmlal2_lane_2d(<4 x i32> %A, <4 x i32> %B, <2 x i64> %C) nou } define i32 @sqdmlal_lane_1s(i32 %A, i16 %B, <4 x i16> %C) nounwind { -; CHECK-LABEL: sqdmlal_lane_1s: -; CHECK: // %bb.0: -; CHECK-NEXT: fmov s1, w1 -; CHECK-NEXT: fmov s2, w0 -; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: sqdmlal s2, h1, v0.h[1] -; CHECK-NEXT: fmov w0, s2 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: sqdmlal_lane_1s: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fmov s1, w0 +; CHECK-SD-NEXT: fmov s2, w1 +; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-SD-NEXT: sqdmlal s1, h2, v0.h[1] +; CHECK-SD-NEXT: fmov w0, s1 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: sqdmlal_lane_1s: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: fmov s1, w1 +; CHECK-GI-NEXT: fmov s2, w0 +; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-GI-NEXT: sqdmlal s2, h1, v0.h[1] +; CHECK-GI-NEXT: fmov w0, s2 +; CHECK-GI-NEXT: ret %lhs = insertelement <4 x i16> undef, i16 %B, i32 0 %rhs = shufflevector <4 x i16> %C, <4 x i16> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef> %prod.vec = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %lhs, <4 x i16> %rhs) @@ -1748,14 +1748,23 @@ define i32 @sqdmlal_lane_1s(i32 %A, i16 %B, <4 x i16> %C) nounwind { declare i32 @llvm.aarch64.neon.sqadd.i32(i32, i32) define i32 @sqdmlsl_lane_1s(i32 %A, i16 %B, <4 x i16> %C) nounwind { -; CHECK-LABEL: sqdmlsl_lane_1s: -; CHECK: // %bb.0: -; CHECK-NEXT: fmov s1, w1 -; CHECK-NEXT: fmov s2, w0 -; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: sqdmlsl s2, h1, v0.h[1] -; CHECK-NEXT: fmov w0, s2 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: sqdmlsl_lane_1s: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fmov s1, w0 +; CHECK-SD-NEXT: fmov s2, w1 +; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-SD-NEXT: sqdmlsl s1, h2, v0.h[1] +; CHECK-SD-NEXT: fmov w0, s1 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: sqdmlsl_lane_1s: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: fmov s1, w1 +; CHECK-GI-NEXT: fmov s2, w0 +; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-GI-NEXT: sqdmlsl s2, h1, v0.h[1] +; CHECK-GI-NEXT: fmov w0, s2 +; CHECK-GI-NEXT: ret %lhs = insertelement <4 x i16> undef, i16 %B, i32 0 %rhs = shufflevector <4 x i16> %C, <4 x i16> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef> %prod.vec = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %lhs, <4 x i16> %rhs) @@ -1766,24 +1775,14 @@ define i32 @sqdmlsl_lane_1s(i32 %A, i16 %B, <4 x i16> %C) nounwind { declare i32 @llvm.aarch64.neon.sqsub.i32(i32, i32) define i32 @sqadd_lane1_sqdmull4s(i32 %A, <4 x i16> %B, <4 x i16> %C) nounwind { -; CHECK-SD-LABEL: sqadd_lane1_sqdmull4s: -; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: sqdmull v0.4s, v0.4h, v1.4h -; CHECK-SD-NEXT: mov w8, v0.s[1] -; CHECK-SD-NEXT: fmov s0, w0 -; CHECK-SD-NEXT: fmov s1, w8 -; CHECK-SD-NEXT: sqadd s0, s0, s1 -; CHECK-SD-NEXT: fmov w0, s0 -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: sqadd_lane1_sqdmull4s: -; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: sqdmull v0.4s, v0.4h, v1.4h -; CHECK-GI-NEXT: fmov s1, w0 -; CHECK-GI-NEXT: mov s0, v0.s[1] -; CHECK-GI-NEXT: sqadd s0, s1, s0 -; CHECK-GI-NEXT: fmov w0, s0 -; CHECK-GI-NEXT: ret +; CHECK-LABEL: sqadd_lane1_sqdmull4s: +; CHECK: // %bb.0: +; CHECK-NEXT: sqdmull v0.4s, v0.4h, v1.4h +; CHECK-NEXT: fmov s1, w0 +; CHECK-NEXT: mov s0, v0.s[1] +; CHECK-NEXT: sqadd s0, s1, s0 +; CHECK-NEXT: fmov w0, s0 +; CHECK-NEXT: ret %prod.vec = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %B, <4 x i16> %C) %prod = extractelement <4 x i32> %prod.vec, i32 1 %res = call i32 @llvm.aarch64.neon.sqadd.i32(i32 %A, i32 %prod) @@ -1791,24 +1790,14 @@ define i32 @sqadd_lane1_sqdmull4s(i32 %A, <4 x i16> %B, <4 x i16> %C) nounwind { } define i32 @sqsub_lane1_sqdmull4s(i32 %A, <4 x i16> %B, <4 x i16> %C) nounwind { -; CHECK-SD-LABEL: sqsub_lane1_sqdmull4s: -; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: sqdmull v0.4s, v0.4h, v1.4h -; CHECK-SD-NEXT: mov w8, v0.s[1] -; CHECK-SD-NEXT: fmov s0, w0 -; CHECK-SD-NEXT: fmov s1, w8 -; CHECK-SD-NEXT: sqsub s0, s0, s1 -; CHECK-SD-NEXT: fmov w0, s0 -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: sqsub_lane1_sqdmull4s: -; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: sqdmull v0.4s, v0.4h, v1.4h -; CHECK-GI-NEXT: fmov s1, w0 -; CHECK-GI-NEXT: mov s0, v0.s[1] -; CHECK-GI-NEXT: sqsub s0, s1, s0 -; CHECK-GI-NEXT: fmov w0, s0 -; CHECK-GI-NEXT: ret +; CHECK-LABEL: sqsub_lane1_sqdmull4s: +; CHECK: // %bb.0: +; CHECK-NEXT: sqdmull v0.4s, v0.4h, v1.4h +; CHECK-NEXT: fmov s1, w0 +; CHECK-NEXT: mov s0, v0.s[1] +; CHECK-NEXT: sqsub s0, s1, s0 +; CHECK-NEXT: fmov w0, s0 +; CHECK-NEXT: ret %prod.vec = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %B, <4 x i16> %C) %prod = extractelement <4 x i32> %prod.vec, i32 1 %res = call i32 @llvm.aarch64.neon.sqsub.i32(i32 %A, i32 %prod) @@ -1818,11 +1807,11 @@ define i32 @sqsub_lane1_sqdmull4s(i32 %A, <4 x i16> %B, <4 x i16> %C) nounwind { define i64 @sqdmlal_lane_1d(i64 %A, i32 %B, <2 x i32> %C) nounwind { ; CHECK-LABEL: sqdmlal_lane_1d: ; CHECK: // %bb.0: -; CHECK-NEXT: fmov d1, x0 -; CHECK-NEXT: fmov s2, w1 +; CHECK-NEXT: fmov s1, w1 +; CHECK-NEXT: fmov d2, x0 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: sqdmlal d1, s2, v0.s[1] -; CHECK-NEXT: fmov x0, d1 +; CHECK-NEXT: sqdmlal d2, s1, v0.s[1] +; CHECK-NEXT: fmov x0, d2 ; CHECK-NEXT: ret %rhs = extractelement <2 x i32> %C, i32 1 %prod = call i64 @llvm.aarch64.neon.sqdmulls.scalar(i32 %B, i32 %rhs) @@ -1835,11 +1824,11 @@ declare i64 @llvm.aarch64.neon.sqadd.i64(i64, i64) define i64 @sqdmlsl_lane_1d(i64 %A, i32 %B, <2 x i32> %C) nounwind { ; CHECK-LABEL: sqdmlsl_lane_1d: ; CHECK: // %bb.0: -; CHECK-NEXT: fmov d1, x0 -; CHECK-NEXT: fmov s2, w1 +; CHECK-NEXT: fmov s1, w1 +; CHECK-NEXT: fmov d2, x0 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: sqdmlsl d1, s2, v0.s[1] -; CHECK-NEXT: fmov x0, d1 +; CHECK-NEXT: sqdmlsl d2, s1, v0.s[1] +; CHECK-NEXT: fmov x0, d2 ; CHECK-NEXT: ret %rhs = extractelement <2 x i32> %C, i32 1 %prod = call i64 @llvm.aarch64.neon.sqdmulls.scalar(i32 %B, i32 %rhs) @@ -2895,11 +2884,18 @@ define <8 x i16> @pmull_from_extract_dup_low(<16 x i8> %lhs, i8 %rhs) { } define <8 x i16> @pmull_from_extract_dup_high(<16 x i8> %lhs, i8 %rhs) { -; CHECK-LABEL: pmull_from_extract_dup_high: -; CHECK: // %bb.0: -; CHECK-NEXT: dup v1.16b, w0 -; CHECK-NEXT: pmull2 v0.8h, v0.16b, v1.16b -; CHECK-NEXT: ret +; CHECK-SD-LABEL: pmull_from_extract_dup_high: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: dup v1.16b, w0 +; CHECK-SD-NEXT: pmull2 v0.8h, v0.16b, v1.16b +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: pmull_from_extract_dup_high: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: dup v1.8b, w0 +; CHECK-GI-NEXT: mov d0, v0.d[1] +; CHECK-GI-NEXT: pmull v0.8h, v0.8b, v1.8b +; CHECK-GI-NEXT: ret %rhsvec.0 = insertelement <8 x i8> undef, i8 %rhs, i32 0 %rhsvec = shufflevector <8 x i8> %rhsvec.0, <8 x i8> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> @@ -2924,12 +2920,20 @@ define <8 x i16> @pmull_from_extract_duplane_low(<16 x i8> %lhs, <8 x i8> %rhs) } define <8 x i16> @pmull_from_extract_duplane_high(<16 x i8> %lhs, <8 x i8> %rhs) { -; CHECK-LABEL: pmull_from_extract_duplane_high: -; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 -; CHECK-NEXT: dup v1.16b, v1.b[0] -; CHECK-NEXT: pmull2 v0.8h, v0.16b, v1.16b -; CHECK-NEXT: ret +; CHECK-SD-LABEL: pmull_from_extract_duplane_high: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: // kill: def $d1 killed $d1 def $q1 +; CHECK-SD-NEXT: dup v1.16b, v1.b[0] +; CHECK-SD-NEXT: pmull2 v0.8h, v0.16b, v1.16b +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: pmull_from_extract_duplane_high: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1 +; CHECK-GI-NEXT: mov d0, v0.d[1] +; CHECK-GI-NEXT: dup v1.8b, v1.b[0] +; CHECK-GI-NEXT: pmull v0.8h, v0.8b, v1.8b +; CHECK-GI-NEXT: ret %lhs.high = shufflevector <16 x i8> %lhs, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> %rhs.high = shufflevector <8 x i8> %rhs, <8 x i8> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> @@ -3183,14 +3187,23 @@ define <1 x double> @test_fdiv_v1f64(<1 x double> %L, <1 x double> %R) nounwind } define i32 @sqdmlal_s(i16 %A, i16 %B, i32 %C) nounwind { -; CHECK-LABEL: sqdmlal_s: -; CHECK: // %bb.0: -; CHECK-NEXT: fmov s0, w0 -; CHECK-NEXT: fmov s1, w1 -; CHECK-NEXT: fmov s2, w2 -; CHECK-NEXT: sqdmlal s2, h0, v1.h[0] -; CHECK-NEXT: fmov w0, s2 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: sqdmlal_s: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fmov s0, w2 +; CHECK-SD-NEXT: fmov s1, w0 +; CHECK-SD-NEXT: fmov s2, w1 +; CHECK-SD-NEXT: sqdmlal s0, h1, v2.h[0] +; CHECK-SD-NEXT: fmov w0, s0 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: sqdmlal_s: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: fmov s0, w0 +; CHECK-GI-NEXT: fmov s1, w1 +; CHECK-GI-NEXT: fmov s2, w2 +; CHECK-GI-NEXT: sqdmlal s2, h0, v1.h[0] +; CHECK-GI-NEXT: fmov w0, s2 +; CHECK-GI-NEXT: ret %tmp1 = insertelement <4 x i16> undef, i16 %A, i64 0 %tmp2 = insertelement <4 x i16> undef, i16 %B, i64 0 %tmp3 = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2) @@ -3202,11 +3215,11 @@ define i32 @sqdmlal_s(i16 %A, i16 %B, i32 %C) nounwind { define i64 @sqdmlal_d(i32 %A, i32 %B, i64 %C) nounwind { ; CHECK-LABEL: sqdmlal_d: ; CHECK: // %bb.0: -; CHECK-NEXT: fmov d0, x2 +; CHECK-NEXT: fmov s0, w1 ; CHECK-NEXT: fmov s1, w0 -; CHECK-NEXT: fmov s2, w1 -; CHECK-NEXT: sqdmlal d0, s1, s2 -; CHECK-NEXT: fmov x0, d0 +; CHECK-NEXT: fmov d2, x2 +; CHECK-NEXT: sqdmlal d2, s1, s0 +; CHECK-NEXT: fmov x0, d2 ; CHECK-NEXT: ret %tmp4 = call i64 @llvm.aarch64.neon.sqdmulls.scalar(i32 %A, i32 %B) %tmp5 = call i64 @llvm.aarch64.neon.sqadd.i64(i64 %C, i64 %tmp4) @@ -3214,14 +3227,23 @@ define i64 @sqdmlal_d(i32 %A, i32 %B, i64 %C) nounwind { } define i32 @sqdmlsl_s(i16 %A, i16 %B, i32 %C) nounwind { -; CHECK-LABEL: sqdmlsl_s: -; CHECK: // %bb.0: -; CHECK-NEXT: fmov s0, w0 -; CHECK-NEXT: fmov s1, w1 -; CHECK-NEXT: fmov s2, w2 -; CHECK-NEXT: sqdmlsl s2, h0, v1.h[0] -; CHECK-NEXT: fmov w0, s2 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: sqdmlsl_s: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fmov s0, w2 +; CHECK-SD-NEXT: fmov s1, w0 +; CHECK-SD-NEXT: fmov s2, w1 +; CHECK-SD-NEXT: sqdmlsl s0, h1, v2.h[0] +; CHECK-SD-NEXT: fmov w0, s0 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: sqdmlsl_s: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: fmov s0, w0 +; CHECK-GI-NEXT: fmov s1, w1 +; CHECK-GI-NEXT: fmov s2, w2 +; CHECK-GI-NEXT: sqdmlsl s2, h0, v1.h[0] +; CHECK-GI-NEXT: fmov w0, s2 +; CHECK-GI-NEXT: ret %tmp1 = insertelement <4 x i16> undef, i16 %A, i64 0 %tmp2 = insertelement <4 x i16> undef, i16 %B, i64 0 %tmp3 = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2) @@ -3233,11 +3255,11 @@ define i32 @sqdmlsl_s(i16 %A, i16 %B, i32 %C) nounwind { define i64 @sqdmlsl_d(i32 %A, i32 %B, i64 %C) nounwind { ; CHECK-LABEL: sqdmlsl_d: ; CHECK: // %bb.0: -; CHECK-NEXT: fmov d0, x2 +; CHECK-NEXT: fmov s0, w1 ; CHECK-NEXT: fmov s1, w0 -; CHECK-NEXT: fmov s2, w1 -; CHECK-NEXT: sqdmlsl d0, s1, s2 -; CHECK-NEXT: fmov x0, d0 +; CHECK-NEXT: fmov d2, x2 +; CHECK-NEXT: sqdmlsl d2, s1, s0 +; CHECK-NEXT: fmov x0, d2 ; CHECK-NEXT: ret %tmp4 = call i64 @llvm.aarch64.neon.sqdmulls.scalar(i32 %A, i32 %B) %tmp5 = call i64 @llvm.aarch64.neon.sqsub.i64(i64 %C, i64 %tmp4) @@ -3245,21 +3267,35 @@ define i64 @sqdmlsl_d(i32 %A, i32 %B, i64 %C) nounwind { } define <16 x i8> @test_pmull_64(i64 %l, i64 %r) nounwind { -; CHECK-LABEL: test_pmull_64: -; CHECK: // %bb.0: -; CHECK-NEXT: fmov d0, x1 -; CHECK-NEXT: fmov d1, x0 -; CHECK-NEXT: pmull v0.1q, v1.1d, v0.1d -; CHECK-NEXT: ret +; CHECK-SD-LABEL: test_pmull_64: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fmov d0, x1 +; CHECK-SD-NEXT: fmov d1, x0 +; CHECK-SD-NEXT: pmull v0.1q, v1.1d, v0.1d +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: test_pmull_64: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: fmov d0, x0 +; CHECK-GI-NEXT: fmov d1, x1 +; CHECK-GI-NEXT: pmull v0.1q, v0.1d, v1.1d +; CHECK-GI-NEXT: ret %val = call <16 x i8> @llvm.aarch64.neon.pmull64(i64 %l, i64 %r) ret <16 x i8> %val } define <16 x i8> @test_pmull_high_64(<2 x i64> %l, <2 x i64> %r) nounwind { -; CHECK-LABEL: test_pmull_high_64: -; CHECK: // %bb.0: -; CHECK-NEXT: pmull2 v0.1q, v0.2d, v1.2d -; CHECK-NEXT: ret +; CHECK-SD-LABEL: test_pmull_high_64: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: pmull2 v0.1q, v0.2d, v1.2d +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: test_pmull_high_64: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: mov d0, v0.d[1] +; CHECK-GI-NEXT: mov d1, v1.d[1] +; CHECK-GI-NEXT: pmull v0.1q, v0.1d, v1.1d +; CHECK-GI-NEXT: ret %l_hi = extractelement <2 x i64> %l, i32 1 %r_hi = extractelement <2 x i64> %r, i32 1 %val = call <16 x i8> @llvm.aarch64.neon.pmull64(i64 %l_hi, i64 %r_hi) @@ -3267,13 +3303,22 @@ define <16 x i8> @test_pmull_high_64(<2 x i64> %l, <2 x i64> %r) nounwind { } define <16 x i8> @test_commutable_pmull_64(i64 %l, i64 %r) nounwind { -; CHECK-LABEL: test_commutable_pmull_64: -; CHECK: // %bb.0: -; CHECK-NEXT: fmov d0, x1 -; CHECK-NEXT: fmov d1, x0 -; CHECK-NEXT: pmull v0.1q, v1.1d, v0.1d -; CHECK-NEXT: add v0.16b, v0.16b, v0.16b -; CHECK-NEXT: ret +; CHECK-SD-LABEL: test_commutable_pmull_64: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fmov d0, x1 +; CHECK-SD-NEXT: fmov d1, x0 +; CHECK-SD-NEXT: pmull v0.1q, v1.1d, v0.1d +; CHECK-SD-NEXT: add v0.16b, v0.16b, v0.16b +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: test_commutable_pmull_64: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: fmov d0, x0 +; CHECK-GI-NEXT: fmov d1, x1 +; CHECK-GI-NEXT: pmull v2.1q, v0.1d, v1.1d +; CHECK-GI-NEXT: pmull v0.1q, v1.1d, v0.1d +; CHECK-GI-NEXT: add v0.16b, v2.16b, v0.16b +; CHECK-GI-NEXT: ret %1 = call <16 x i8> @llvm.aarch64.neon.pmull64(i64 %l, i64 %r) %2 = call <16 x i8> @llvm.aarch64.neon.pmull64(i64 %r, i64 %l) %3 = add <16 x i8> %1, %2 |
