aboutsummaryrefslogtreecommitdiff
path: root/llvm/test/CodeGen/AArch64/arm64-vmul.ll
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/test/CodeGen/AArch64/arm64-vmul.ll')
-rw-r--r--llvm/test/CodeGen/AArch64/arm64-vmul.ll345
1 files changed, 195 insertions, 150 deletions
diff --git a/llvm/test/CodeGen/AArch64/arm64-vmul.ll b/llvm/test/CodeGen/AArch64/arm64-vmul.ll
index e6df9f2..712452c 100644
--- a/llvm/test/CodeGen/AArch64/arm64-vmul.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-vmul.ll
@@ -2,44 +2,35 @@
; RUN: llc -mtriple=aarch64-none-elf -mattr=+aes < %s | FileCheck %s --check-prefixes=CHECK,CHECK-SD
; RUN: llc -mtriple=aarch64-none-elf -mattr=+aes -global-isel -global-isel-abort=2 2>&1 < %s | FileCheck %s --check-prefixes=CHECK,CHECK-GI
-; CHECK-GI: warning: Instruction selection used fallback path for pmull8h
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for commutable_pmull8h
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqdmulh_1s
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_2s
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_4s
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_2d
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_commuted_neg_2s
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_commuted_neg_4s
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_commuted_neg_2d
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_indexed_2s
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_indexed_4s
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_indexed_2d
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_indexed_2s_strict
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_indexed_4s_strict
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_indexed_2d_strict
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmla_indexed_scalar_2s_strict
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmla_indexed_scalar_4s_strict
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmla_indexed_scalar_2d_strict
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqdmulh_lane_1s
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqdmlal_lane_1d
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqdmlsl_lane_1d
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for pmull_from_extract_dup_low
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for pmull_from_extract_dup_high
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for pmull_from_extract_duplane_low
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for pmull_from_extract_duplane_high
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for scalar_fmls_from_extract_v4f32
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for scalar_fmls_from_extract_v2f32
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for scalar_fmls_from_extract_v2f64
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_with_fneg_before_extract_v2f32
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_with_fneg_before_extract_v2f32_1
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_with_fneg_before_extract_v4f32
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_with_fneg_before_extract_v4f32_1
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_with_fneg_before_extract_v2f64
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqdmlal_d
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqdmlsl_d
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_pmull_64
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_pmull_high_64
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_commutable_pmull_64
+; CHECK-GI: warning: Instruction selection used fallback path for sqdmulh_1s
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_2s
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_4s
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_2d
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_commuted_neg_2s
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_commuted_neg_4s
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_commuted_neg_2d
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_indexed_2s
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_indexed_4s
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_indexed_2d
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_indexed_2s_strict
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_indexed_4s_strict
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_indexed_2d_strict
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmla_indexed_scalar_2s_strict
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmla_indexed_scalar_4s_strict
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmla_indexed_scalar_2d_strict
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqdmulh_lane_1s
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqdmlal_lane_1d
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqdmlsl_lane_1d
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for scalar_fmls_from_extract_v4f32
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for scalar_fmls_from_extract_v2f32
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for scalar_fmls_from_extract_v2f64
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_with_fneg_before_extract_v2f32
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_with_fneg_before_extract_v2f32_1
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_with_fneg_before_extract_v4f32
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_with_fneg_before_extract_v4f32_1
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fmls_with_fneg_before_extract_v2f64
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqdmlal_d
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for sqdmlsl_d
define <8 x i16> @smull8h(ptr %A, ptr %B) nounwind {
; CHECK-LABEL: smull8h:
@@ -1730,14 +1721,23 @@ define <2 x i64> @sqdmlal2_lane_2d(<4 x i32> %A, <4 x i32> %B, <2 x i64> %C) nou
}
define i32 @sqdmlal_lane_1s(i32 %A, i16 %B, <4 x i16> %C) nounwind {
-; CHECK-LABEL: sqdmlal_lane_1s:
-; CHECK: // %bb.0:
-; CHECK-NEXT: fmov s1, w1
-; CHECK-NEXT: fmov s2, w0
-; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT: sqdmlal s2, h1, v0.h[1]
-; CHECK-NEXT: fmov w0, s2
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: sqdmlal_lane_1s:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: fmov s1, w0
+; CHECK-SD-NEXT: fmov s2, w1
+; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-NEXT: sqdmlal s1, h2, v0.h[1]
+; CHECK-SD-NEXT: fmov w0, s1
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: sqdmlal_lane_1s:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: fmov s1, w1
+; CHECK-GI-NEXT: fmov s2, w0
+; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-NEXT: sqdmlal s2, h1, v0.h[1]
+; CHECK-GI-NEXT: fmov w0, s2
+; CHECK-GI-NEXT: ret
%lhs = insertelement <4 x i16> undef, i16 %B, i32 0
%rhs = shufflevector <4 x i16> %C, <4 x i16> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
%prod.vec = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %lhs, <4 x i16> %rhs)
@@ -1748,14 +1748,23 @@ define i32 @sqdmlal_lane_1s(i32 %A, i16 %B, <4 x i16> %C) nounwind {
declare i32 @llvm.aarch64.neon.sqadd.i32(i32, i32)
define i32 @sqdmlsl_lane_1s(i32 %A, i16 %B, <4 x i16> %C) nounwind {
-; CHECK-LABEL: sqdmlsl_lane_1s:
-; CHECK: // %bb.0:
-; CHECK-NEXT: fmov s1, w1
-; CHECK-NEXT: fmov s2, w0
-; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT: sqdmlsl s2, h1, v0.h[1]
-; CHECK-NEXT: fmov w0, s2
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: sqdmlsl_lane_1s:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: fmov s1, w0
+; CHECK-SD-NEXT: fmov s2, w1
+; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-NEXT: sqdmlsl s1, h2, v0.h[1]
+; CHECK-SD-NEXT: fmov w0, s1
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: sqdmlsl_lane_1s:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: fmov s1, w1
+; CHECK-GI-NEXT: fmov s2, w0
+; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-NEXT: sqdmlsl s2, h1, v0.h[1]
+; CHECK-GI-NEXT: fmov w0, s2
+; CHECK-GI-NEXT: ret
%lhs = insertelement <4 x i16> undef, i16 %B, i32 0
%rhs = shufflevector <4 x i16> %C, <4 x i16> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
%prod.vec = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %lhs, <4 x i16> %rhs)
@@ -1766,24 +1775,14 @@ define i32 @sqdmlsl_lane_1s(i32 %A, i16 %B, <4 x i16> %C) nounwind {
declare i32 @llvm.aarch64.neon.sqsub.i32(i32, i32)
define i32 @sqadd_lane1_sqdmull4s(i32 %A, <4 x i16> %B, <4 x i16> %C) nounwind {
-; CHECK-SD-LABEL: sqadd_lane1_sqdmull4s:
-; CHECK-SD: // %bb.0:
-; CHECK-SD-NEXT: sqdmull v0.4s, v0.4h, v1.4h
-; CHECK-SD-NEXT: mov w8, v0.s[1]
-; CHECK-SD-NEXT: fmov s0, w0
-; CHECK-SD-NEXT: fmov s1, w8
-; CHECK-SD-NEXT: sqadd s0, s0, s1
-; CHECK-SD-NEXT: fmov w0, s0
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: sqadd_lane1_sqdmull4s:
-; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: sqdmull v0.4s, v0.4h, v1.4h
-; CHECK-GI-NEXT: fmov s1, w0
-; CHECK-GI-NEXT: mov s0, v0.s[1]
-; CHECK-GI-NEXT: sqadd s0, s1, s0
-; CHECK-GI-NEXT: fmov w0, s0
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: sqadd_lane1_sqdmull4s:
+; CHECK: // %bb.0:
+; CHECK-NEXT: sqdmull v0.4s, v0.4h, v1.4h
+; CHECK-NEXT: fmov s1, w0
+; CHECK-NEXT: mov s0, v0.s[1]
+; CHECK-NEXT: sqadd s0, s1, s0
+; CHECK-NEXT: fmov w0, s0
+; CHECK-NEXT: ret
%prod.vec = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %B, <4 x i16> %C)
%prod = extractelement <4 x i32> %prod.vec, i32 1
%res = call i32 @llvm.aarch64.neon.sqadd.i32(i32 %A, i32 %prod)
@@ -1791,24 +1790,14 @@ define i32 @sqadd_lane1_sqdmull4s(i32 %A, <4 x i16> %B, <4 x i16> %C) nounwind {
}
define i32 @sqsub_lane1_sqdmull4s(i32 %A, <4 x i16> %B, <4 x i16> %C) nounwind {
-; CHECK-SD-LABEL: sqsub_lane1_sqdmull4s:
-; CHECK-SD: // %bb.0:
-; CHECK-SD-NEXT: sqdmull v0.4s, v0.4h, v1.4h
-; CHECK-SD-NEXT: mov w8, v0.s[1]
-; CHECK-SD-NEXT: fmov s0, w0
-; CHECK-SD-NEXT: fmov s1, w8
-; CHECK-SD-NEXT: sqsub s0, s0, s1
-; CHECK-SD-NEXT: fmov w0, s0
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: sqsub_lane1_sqdmull4s:
-; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: sqdmull v0.4s, v0.4h, v1.4h
-; CHECK-GI-NEXT: fmov s1, w0
-; CHECK-GI-NEXT: mov s0, v0.s[1]
-; CHECK-GI-NEXT: sqsub s0, s1, s0
-; CHECK-GI-NEXT: fmov w0, s0
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: sqsub_lane1_sqdmull4s:
+; CHECK: // %bb.0:
+; CHECK-NEXT: sqdmull v0.4s, v0.4h, v1.4h
+; CHECK-NEXT: fmov s1, w0
+; CHECK-NEXT: mov s0, v0.s[1]
+; CHECK-NEXT: sqsub s0, s1, s0
+; CHECK-NEXT: fmov w0, s0
+; CHECK-NEXT: ret
%prod.vec = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %B, <4 x i16> %C)
%prod = extractelement <4 x i32> %prod.vec, i32 1
%res = call i32 @llvm.aarch64.neon.sqsub.i32(i32 %A, i32 %prod)
@@ -1818,11 +1807,11 @@ define i32 @sqsub_lane1_sqdmull4s(i32 %A, <4 x i16> %B, <4 x i16> %C) nounwind {
define i64 @sqdmlal_lane_1d(i64 %A, i32 %B, <2 x i32> %C) nounwind {
; CHECK-LABEL: sqdmlal_lane_1d:
; CHECK: // %bb.0:
-; CHECK-NEXT: fmov d1, x0
-; CHECK-NEXT: fmov s2, w1
+; CHECK-NEXT: fmov s1, w1
+; CHECK-NEXT: fmov d2, x0
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT: sqdmlal d1, s2, v0.s[1]
-; CHECK-NEXT: fmov x0, d1
+; CHECK-NEXT: sqdmlal d2, s1, v0.s[1]
+; CHECK-NEXT: fmov x0, d2
; CHECK-NEXT: ret
%rhs = extractelement <2 x i32> %C, i32 1
%prod = call i64 @llvm.aarch64.neon.sqdmulls.scalar(i32 %B, i32 %rhs)
@@ -1835,11 +1824,11 @@ declare i64 @llvm.aarch64.neon.sqadd.i64(i64, i64)
define i64 @sqdmlsl_lane_1d(i64 %A, i32 %B, <2 x i32> %C) nounwind {
; CHECK-LABEL: sqdmlsl_lane_1d:
; CHECK: // %bb.0:
-; CHECK-NEXT: fmov d1, x0
-; CHECK-NEXT: fmov s2, w1
+; CHECK-NEXT: fmov s1, w1
+; CHECK-NEXT: fmov d2, x0
; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT: sqdmlsl d1, s2, v0.s[1]
-; CHECK-NEXT: fmov x0, d1
+; CHECK-NEXT: sqdmlsl d2, s1, v0.s[1]
+; CHECK-NEXT: fmov x0, d2
; CHECK-NEXT: ret
%rhs = extractelement <2 x i32> %C, i32 1
%prod = call i64 @llvm.aarch64.neon.sqdmulls.scalar(i32 %B, i32 %rhs)
@@ -2895,11 +2884,18 @@ define <8 x i16> @pmull_from_extract_dup_low(<16 x i8> %lhs, i8 %rhs) {
}
define <8 x i16> @pmull_from_extract_dup_high(<16 x i8> %lhs, i8 %rhs) {
-; CHECK-LABEL: pmull_from_extract_dup_high:
-; CHECK: // %bb.0:
-; CHECK-NEXT: dup v1.16b, w0
-; CHECK-NEXT: pmull2 v0.8h, v0.16b, v1.16b
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: pmull_from_extract_dup_high:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: dup v1.16b, w0
+; CHECK-SD-NEXT: pmull2 v0.8h, v0.16b, v1.16b
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: pmull_from_extract_dup_high:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: dup v1.8b, w0
+; CHECK-GI-NEXT: mov d0, v0.d[1]
+; CHECK-GI-NEXT: pmull v0.8h, v0.8b, v1.8b
+; CHECK-GI-NEXT: ret
%rhsvec.0 = insertelement <8 x i8> undef, i8 %rhs, i32 0
%rhsvec = shufflevector <8 x i8> %rhsvec.0, <8 x i8> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
@@ -2924,12 +2920,20 @@ define <8 x i16> @pmull_from_extract_duplane_low(<16 x i8> %lhs, <8 x i8> %rhs)
}
define <8 x i16> @pmull_from_extract_duplane_high(<16 x i8> %lhs, <8 x i8> %rhs) {
-; CHECK-LABEL: pmull_from_extract_duplane_high:
-; CHECK: // %bb.0:
-; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
-; CHECK-NEXT: dup v1.16b, v1.b[0]
-; CHECK-NEXT: pmull2 v0.8h, v0.16b, v1.16b
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: pmull_from_extract_duplane_high:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: // kill: def $d1 killed $d1 def $q1
+; CHECK-SD-NEXT: dup v1.16b, v1.b[0]
+; CHECK-SD-NEXT: pmull2 v0.8h, v0.16b, v1.16b
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: pmull_from_extract_duplane_high:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1
+; CHECK-GI-NEXT: mov d0, v0.d[1]
+; CHECK-GI-NEXT: dup v1.8b, v1.b[0]
+; CHECK-GI-NEXT: pmull v0.8h, v0.8b, v1.8b
+; CHECK-GI-NEXT: ret
%lhs.high = shufflevector <16 x i8> %lhs, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%rhs.high = shufflevector <8 x i8> %rhs, <8 x i8> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
@@ -3183,14 +3187,23 @@ define <1 x double> @test_fdiv_v1f64(<1 x double> %L, <1 x double> %R) nounwind
}
define i32 @sqdmlal_s(i16 %A, i16 %B, i32 %C) nounwind {
-; CHECK-LABEL: sqdmlal_s:
-; CHECK: // %bb.0:
-; CHECK-NEXT: fmov s0, w0
-; CHECK-NEXT: fmov s1, w1
-; CHECK-NEXT: fmov s2, w2
-; CHECK-NEXT: sqdmlal s2, h0, v1.h[0]
-; CHECK-NEXT: fmov w0, s2
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: sqdmlal_s:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: fmov s0, w2
+; CHECK-SD-NEXT: fmov s1, w0
+; CHECK-SD-NEXT: fmov s2, w1
+; CHECK-SD-NEXT: sqdmlal s0, h1, v2.h[0]
+; CHECK-SD-NEXT: fmov w0, s0
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: sqdmlal_s:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: fmov s0, w0
+; CHECK-GI-NEXT: fmov s1, w1
+; CHECK-GI-NEXT: fmov s2, w2
+; CHECK-GI-NEXT: sqdmlal s2, h0, v1.h[0]
+; CHECK-GI-NEXT: fmov w0, s2
+; CHECK-GI-NEXT: ret
%tmp1 = insertelement <4 x i16> undef, i16 %A, i64 0
%tmp2 = insertelement <4 x i16> undef, i16 %B, i64 0
%tmp3 = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2)
@@ -3202,11 +3215,11 @@ define i32 @sqdmlal_s(i16 %A, i16 %B, i32 %C) nounwind {
define i64 @sqdmlal_d(i32 %A, i32 %B, i64 %C) nounwind {
; CHECK-LABEL: sqdmlal_d:
; CHECK: // %bb.0:
-; CHECK-NEXT: fmov d0, x2
+; CHECK-NEXT: fmov s0, w1
; CHECK-NEXT: fmov s1, w0
-; CHECK-NEXT: fmov s2, w1
-; CHECK-NEXT: sqdmlal d0, s1, s2
-; CHECK-NEXT: fmov x0, d0
+; CHECK-NEXT: fmov d2, x2
+; CHECK-NEXT: sqdmlal d2, s1, s0
+; CHECK-NEXT: fmov x0, d2
; CHECK-NEXT: ret
%tmp4 = call i64 @llvm.aarch64.neon.sqdmulls.scalar(i32 %A, i32 %B)
%tmp5 = call i64 @llvm.aarch64.neon.sqadd.i64(i64 %C, i64 %tmp4)
@@ -3214,14 +3227,23 @@ define i64 @sqdmlal_d(i32 %A, i32 %B, i64 %C) nounwind {
}
define i32 @sqdmlsl_s(i16 %A, i16 %B, i32 %C) nounwind {
-; CHECK-LABEL: sqdmlsl_s:
-; CHECK: // %bb.0:
-; CHECK-NEXT: fmov s0, w0
-; CHECK-NEXT: fmov s1, w1
-; CHECK-NEXT: fmov s2, w2
-; CHECK-NEXT: sqdmlsl s2, h0, v1.h[0]
-; CHECK-NEXT: fmov w0, s2
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: sqdmlsl_s:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: fmov s0, w2
+; CHECK-SD-NEXT: fmov s1, w0
+; CHECK-SD-NEXT: fmov s2, w1
+; CHECK-SD-NEXT: sqdmlsl s0, h1, v2.h[0]
+; CHECK-SD-NEXT: fmov w0, s0
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: sqdmlsl_s:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: fmov s0, w0
+; CHECK-GI-NEXT: fmov s1, w1
+; CHECK-GI-NEXT: fmov s2, w2
+; CHECK-GI-NEXT: sqdmlsl s2, h0, v1.h[0]
+; CHECK-GI-NEXT: fmov w0, s2
+; CHECK-GI-NEXT: ret
%tmp1 = insertelement <4 x i16> undef, i16 %A, i64 0
%tmp2 = insertelement <4 x i16> undef, i16 %B, i64 0
%tmp3 = tail call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2)
@@ -3233,11 +3255,11 @@ define i32 @sqdmlsl_s(i16 %A, i16 %B, i32 %C) nounwind {
define i64 @sqdmlsl_d(i32 %A, i32 %B, i64 %C) nounwind {
; CHECK-LABEL: sqdmlsl_d:
; CHECK: // %bb.0:
-; CHECK-NEXT: fmov d0, x2
+; CHECK-NEXT: fmov s0, w1
; CHECK-NEXT: fmov s1, w0
-; CHECK-NEXT: fmov s2, w1
-; CHECK-NEXT: sqdmlsl d0, s1, s2
-; CHECK-NEXT: fmov x0, d0
+; CHECK-NEXT: fmov d2, x2
+; CHECK-NEXT: sqdmlsl d2, s1, s0
+; CHECK-NEXT: fmov x0, d2
; CHECK-NEXT: ret
%tmp4 = call i64 @llvm.aarch64.neon.sqdmulls.scalar(i32 %A, i32 %B)
%tmp5 = call i64 @llvm.aarch64.neon.sqsub.i64(i64 %C, i64 %tmp4)
@@ -3245,21 +3267,35 @@ define i64 @sqdmlsl_d(i32 %A, i32 %B, i64 %C) nounwind {
}
define <16 x i8> @test_pmull_64(i64 %l, i64 %r) nounwind {
-; CHECK-LABEL: test_pmull_64:
-; CHECK: // %bb.0:
-; CHECK-NEXT: fmov d0, x1
-; CHECK-NEXT: fmov d1, x0
-; CHECK-NEXT: pmull v0.1q, v1.1d, v0.1d
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_pmull_64:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: fmov d0, x1
+; CHECK-SD-NEXT: fmov d1, x0
+; CHECK-SD-NEXT: pmull v0.1q, v1.1d, v0.1d
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_pmull_64:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: fmov d0, x0
+; CHECK-GI-NEXT: fmov d1, x1
+; CHECK-GI-NEXT: pmull v0.1q, v0.1d, v1.1d
+; CHECK-GI-NEXT: ret
%val = call <16 x i8> @llvm.aarch64.neon.pmull64(i64 %l, i64 %r)
ret <16 x i8> %val
}
define <16 x i8> @test_pmull_high_64(<2 x i64> %l, <2 x i64> %r) nounwind {
-; CHECK-LABEL: test_pmull_high_64:
-; CHECK: // %bb.0:
-; CHECK-NEXT: pmull2 v0.1q, v0.2d, v1.2d
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_pmull_high_64:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: pmull2 v0.1q, v0.2d, v1.2d
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_pmull_high_64:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: mov d0, v0.d[1]
+; CHECK-GI-NEXT: mov d1, v1.d[1]
+; CHECK-GI-NEXT: pmull v0.1q, v0.1d, v1.1d
+; CHECK-GI-NEXT: ret
%l_hi = extractelement <2 x i64> %l, i32 1
%r_hi = extractelement <2 x i64> %r, i32 1
%val = call <16 x i8> @llvm.aarch64.neon.pmull64(i64 %l_hi, i64 %r_hi)
@@ -3267,13 +3303,22 @@ define <16 x i8> @test_pmull_high_64(<2 x i64> %l, <2 x i64> %r) nounwind {
}
define <16 x i8> @test_commutable_pmull_64(i64 %l, i64 %r) nounwind {
-; CHECK-LABEL: test_commutable_pmull_64:
-; CHECK: // %bb.0:
-; CHECK-NEXT: fmov d0, x1
-; CHECK-NEXT: fmov d1, x0
-; CHECK-NEXT: pmull v0.1q, v1.1d, v0.1d
-; CHECK-NEXT: add v0.16b, v0.16b, v0.16b
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_commutable_pmull_64:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: fmov d0, x1
+; CHECK-SD-NEXT: fmov d1, x0
+; CHECK-SD-NEXT: pmull v0.1q, v1.1d, v0.1d
+; CHECK-SD-NEXT: add v0.16b, v0.16b, v0.16b
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_commutable_pmull_64:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: fmov d0, x0
+; CHECK-GI-NEXT: fmov d1, x1
+; CHECK-GI-NEXT: pmull v2.1q, v0.1d, v1.1d
+; CHECK-GI-NEXT: pmull v0.1q, v1.1d, v0.1d
+; CHECK-GI-NEXT: add v0.16b, v2.16b, v0.16b
+; CHECK-GI-NEXT: ret
%1 = call <16 x i8> @llvm.aarch64.neon.pmull64(i64 %l, i64 %r)
%2 = call <16 x i8> @llvm.aarch64.neon.pmull64(i64 %r, i64 %l)
%3 = add <16 x i8> %1, %2