aboutsummaryrefslogtreecommitdiff
path: root/llvm/test/CodeGen/AArch64/arm64-neon-aba-abd.ll
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/test/CodeGen/AArch64/arm64-neon-aba-abd.ll')
-rw-r--r--llvm/test/CodeGen/AArch64/arm64-neon-aba-abd.ll285
1 files changed, 206 insertions, 79 deletions
diff --git a/llvm/test/CodeGen/AArch64/arm64-neon-aba-abd.ll b/llvm/test/CodeGen/AArch64/arm64-neon-aba-abd.ll
index cc9732b..6c7ddd9 100644
--- a/llvm/test/CodeGen/AArch64/arm64-neon-aba-abd.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-neon-aba-abd.ll
@@ -1,5 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=arm64-none-linux-gnu -mattr=+neon < %s | FileCheck %s
+; RUN: llc -mtriple=arm64-none-linux-gnu < %s | FileCheck %s --check-prefixes=CHECK,CHECK-SD
+; RUN: llc -mtriple=arm64-none-linux-gnu -global-isel < %s | FileCheck %s --check-prefixes=CHECK,CHECK-GI
declare <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8>, <8 x i8>)
declare <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8>, <8 x i8>)
@@ -197,11 +198,20 @@ define <2 x i32> @test_sabd_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
}
define <2 x i32> @test_sabd_v2i32_const() {
-; CHECK-LABEL: test_sabd_v2i32_const:
-; CHECK: // %bb.0:
-; CHECK-NEXT: adrp x8, .LCPI19_0
-; CHECK-NEXT: ldr d0, [x8, :lo12:.LCPI19_0]
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_sabd_v2i32_const:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: adrp x8, .LCPI19_0
+; CHECK-SD-NEXT: ldr d0, [x8, :lo12:.LCPI19_0]
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_sabd_v2i32_const:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: adrp x8, .LCPI19_1
+; CHECK-GI-NEXT: adrp x9, .LCPI19_0
+; CHECK-GI-NEXT: ldr d0, [x8, :lo12:.LCPI19_1]
+; CHECK-GI-NEXT: ldr d1, [x9, :lo12:.LCPI19_0]
+; CHECK-GI-NEXT: sabd v0.2s, v0.2s, v1.2s
+; CHECK-GI-NEXT: ret
%1 = tail call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(
<2 x i32> <i32 -2147483648, i32 2147450880>,
<2 x i32> <i32 -65536, i32 65535>)
@@ -293,15 +303,26 @@ define <2 x double> @test_fabd_v2f64(<2 x double> %lhs, <2 x double> %rhs) {
}
define <8 x i16> @test_uabd_knownbits_vec8i16(<8 x i16> %lhs, <8 x i16> %rhs) {
-; CHECK-LABEL: test_uabd_knownbits_vec8i16:
-; CHECK: // %bb.0:
-; CHECK-NEXT: movi v2.8h, #15
-; CHECK-NEXT: and v0.16b, v0.16b, v2.16b
-; CHECK-NEXT: and v1.16b, v1.16b, v2.16b
-; CHECK-NEXT: uabd v0.8h, v0.8h, v1.8h
-; CHECK-NEXT: rev64 v0.8h, v0.8h
-; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #8
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_uabd_knownbits_vec8i16:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: movi v2.8h, #15
+; CHECK-SD-NEXT: and v0.16b, v0.16b, v2.16b
+; CHECK-SD-NEXT: and v1.16b, v1.16b, v2.16b
+; CHECK-SD-NEXT: uabd v0.8h, v0.8h, v1.8h
+; CHECK-SD-NEXT: rev64 v0.8h, v0.8h
+; CHECK-SD-NEXT: ext v0.16b, v0.16b, v0.16b, #8
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_uabd_knownbits_vec8i16:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: movi v2.8h, #15
+; CHECK-GI-NEXT: and v0.16b, v0.16b, v2.16b
+; CHECK-GI-NEXT: and v1.16b, v1.16b, v2.16b
+; CHECK-GI-NEXT: uabd v0.8h, v0.8h, v1.8h
+; CHECK-GI-NEXT: rev64 v0.8h, v0.8h
+; CHECK-GI-NEXT: ext v0.16b, v0.16b, v0.16b, #8
+; CHECK-GI-NEXT: and v0.16b, v0.16b, v2.16b
+; CHECK-GI-NEXT: ret
%and1 = and <8 x i16> %lhs, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
%and2 = and <8 x i16> %rhs, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
%uabd = call <8 x i16> @llvm.aarch64.neon.uabd.v8i16(<8 x i16> %and1, <8 x i16> %and2)
@@ -311,11 +332,22 @@ define <8 x i16> @test_uabd_knownbits_vec8i16(<8 x i16> %lhs, <8 x i16> %rhs) {
}
define <4 x i32> @knownbits_uabd_mask_and_shuffle_lshr(<4 x i32> %a0, <4 x i32> %a1) {
-; CHECK-LABEL: knownbits_uabd_mask_and_shuffle_lshr:
-; CHECK: // %bb.0:
-; CHECK-NEXT: movi v0.2d, #0000000000000000
-; CHECK-NEXT: ushr v0.4s, v0.4s, #17
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: knownbits_uabd_mask_and_shuffle_lshr:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: movi v0.2d, #0000000000000000
+; CHECK-SD-NEXT: ushr v0.4s, v0.4s, #17
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: knownbits_uabd_mask_and_shuffle_lshr:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: movi v2.2d, #0x00ffff0000ffff
+; CHECK-GI-NEXT: and v0.16b, v0.16b, v2.16b
+; CHECK-GI-NEXT: and v1.16b, v1.16b, v2.16b
+; CHECK-GI-NEXT: uabd v0.4s, v0.4s, v1.4s
+; CHECK-GI-NEXT: rev64 v0.4s, v0.4s
+; CHECK-GI-NEXT: ext v0.16b, v0.16b, v0.16b, #8
+; CHECK-GI-NEXT: ushr v0.4s, v0.4s, #17
+; CHECK-GI-NEXT: ret
%1 = and <4 x i32> %a0, <i32 65535, i32 65535, i32 65535, i32 65535>
%2 = and <4 x i32> %a1, <i32 65535, i32 65535, i32 65535, i32 65535>
%3 = call <4 x i32> @llvm.aarch64.neon.uabd.v4i32(<4 x i32> %1, <4 x i32> %2)
@@ -325,10 +357,19 @@ define <4 x i32> @knownbits_uabd_mask_and_shuffle_lshr(<4 x i32> %a0, <4 x i32>
}
define <4 x i32> @knownbits_mask_and_shuffle_lshr(<4 x i32> %a0, <4 x i32> %a1) {
-; CHECK-LABEL: knownbits_mask_and_shuffle_lshr:
-; CHECK: // %bb.0:
-; CHECK-NEXT: movi v0.2d, #0000000000000000
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: knownbits_mask_and_shuffle_lshr:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: movi v0.2d, #0000000000000000
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: knownbits_mask_and_shuffle_lshr:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: movi v2.4s, #127, msl #8
+; CHECK-GI-NEXT: and v0.16b, v0.16b, v2.16b
+; CHECK-GI-NEXT: and v1.16b, v1.16b, v2.16b
+; CHECK-GI-NEXT: uabd v0.4s, v0.4s, v1.4s
+; CHECK-GI-NEXT: ushr v0.4s, v0.4s, #17
+; CHECK-GI-NEXT: ret
%1 = and <4 x i32> %a0, <i32 32767, i32 32767, i32 32767, i32 32767>
%2 = and <4 x i32> %a1, <i32 32767, i32 32767, i32 32767, i32 32767>
%3 = call <4 x i32> @llvm.aarch64.neon.uabd.v4i32(<4 x i32> %1, <4 x i32> %2)
@@ -338,20 +379,36 @@ define <4 x i32> @knownbits_mask_and_shuffle_lshr(<4 x i32> %a0, <4 x i32> %a1)
}
define <4 x i32> @test_sabd_knownbits_vec4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
-; CHECK-LABEL: test_sabd_knownbits_vec4i32:
-; CHECK: // %bb.0:
-; CHECK-NEXT: adrp x8, .LCPI31_0
-; CHECK-NEXT: adrp x9, .LCPI31_1
-; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI31_0]
-; CHECK-NEXT: ldr q3, [x9, :lo12:.LCPI31_1]
-; CHECK-NEXT: and v0.16b, v0.16b, v2.16b
-; CHECK-NEXT: and v1.16b, v1.16b, v3.16b
-; CHECK-NEXT: sabd v0.4s, v0.4s, v1.4s
-; CHECK-NEXT: movi v1.2d, #0x0000ff000000ff
-; CHECK-NEXT: mov v0.s[1], v0.s[0]
-; CHECK-NEXT: trn2 v0.4s, v0.4s, v0.4s
-; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_sabd_knownbits_vec4i32:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: adrp x8, .LCPI31_0
+; CHECK-SD-NEXT: adrp x9, .LCPI31_1
+; CHECK-SD-NEXT: ldr q2, [x8, :lo12:.LCPI31_0]
+; CHECK-SD-NEXT: ldr q3, [x9, :lo12:.LCPI31_1]
+; CHECK-SD-NEXT: and v0.16b, v0.16b, v2.16b
+; CHECK-SD-NEXT: and v1.16b, v1.16b, v3.16b
+; CHECK-SD-NEXT: sabd v0.4s, v0.4s, v1.4s
+; CHECK-SD-NEXT: movi v1.2d, #0x0000ff000000ff
+; CHECK-SD-NEXT: mov v0.s[1], v0.s[0]
+; CHECK-SD-NEXT: trn2 v0.4s, v0.4s, v0.4s
+; CHECK-SD-NEXT: and v0.16b, v0.16b, v1.16b
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_sabd_knownbits_vec4i32:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: adrp x8, .LCPI31_2
+; CHECK-GI-NEXT: adrp x9, .LCPI31_1
+; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI31_2]
+; CHECK-GI-NEXT: ldr q3, [x9, :lo12:.LCPI31_1]
+; CHECK-GI-NEXT: adrp x8, .LCPI31_0
+; CHECK-GI-NEXT: and v0.16b, v0.16b, v2.16b
+; CHECK-GI-NEXT: and v1.16b, v1.16b, v3.16b
+; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI31_0]
+; CHECK-GI-NEXT: movi v3.2d, #0x0000ff000000ff
+; CHECK-GI-NEXT: sabd v0.4s, v0.4s, v1.4s
+; CHECK-GI-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b
+; CHECK-GI-NEXT: and v0.16b, v0.16b, v3.16b
+; CHECK-GI-NEXT: ret
%and1 = and <4 x i32> %lhs, <i32 255, i32 -1, i32 -1, i32 255>
%and2 = and <4 x i32> %rhs, <i32 255, i32 255, i32 -1, i32 -1>
%abd = call <4 x i32> @llvm.aarch64.neon.sabd.v4i32(<4 x i32> %and1, <4 x i32> %and2)
@@ -361,15 +418,27 @@ define <4 x i32> @test_sabd_knownbits_vec4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
}
define <4 x i32> @knownbits_sabd_and_mask(<4 x i32> %a0, <4 x i32> %a1) {
-; CHECK-LABEL: knownbits_sabd_and_mask:
-; CHECK: // %bb.0:
-; CHECK-NEXT: adrp x8, .LCPI32_0
-; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI32_0]
-; CHECK-NEXT: and v0.16b, v0.16b, v2.16b
-; CHECK-NEXT: and v1.16b, v1.16b, v2.16b
-; CHECK-NEXT: sabd v0.4s, v0.4s, v1.4s
-; CHECK-NEXT: zip2 v0.4s, v0.4s, v0.4s
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: knownbits_sabd_and_mask:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: adrp x8, .LCPI32_0
+; CHECK-SD-NEXT: ldr q2, [x8, :lo12:.LCPI32_0]
+; CHECK-SD-NEXT: and v0.16b, v0.16b, v2.16b
+; CHECK-SD-NEXT: and v1.16b, v1.16b, v2.16b
+; CHECK-SD-NEXT: sabd v0.4s, v0.4s, v1.4s
+; CHECK-SD-NEXT: zip2 v0.4s, v0.4s, v0.4s
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: knownbits_sabd_and_mask:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: adrp x8, .LCPI32_1
+; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI32_1]
+; CHECK-GI-NEXT: adrp x8, .LCPI32_0
+; CHECK-GI-NEXT: and v0.16b, v0.16b, v2.16b
+; CHECK-GI-NEXT: and v1.16b, v1.16b, v2.16b
+; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI32_0]
+; CHECK-GI-NEXT: sabd v0.4s, v0.4s, v1.4s
+; CHECK-GI-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b
+; CHECK-GI-NEXT: ret
%1 = and <4 x i32> %a0, <i32 -1, i32 -1, i32 255, i32 4085>
%2 = and <4 x i32> %a1, <i32 -1, i32 -1, i32 255, i32 4085>
%3 = call <4 x i32> @llvm.aarch64.neon.sabd.v4i32(<4 x i32> %1, <4 x i32> %2)
@@ -378,10 +447,25 @@ define <4 x i32> @knownbits_sabd_and_mask(<4 x i32> %a0, <4 x i32> %a1) {
}
define <4 x i32> @knownbits_sabd_and_or_mask(<4 x i32> %a0, <4 x i32> %a1) {
-; CHECK-LABEL: knownbits_sabd_and_or_mask:
-; CHECK: // %bb.0:
-; CHECK-NEXT: movi v0.2d, #0000000000000000
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: knownbits_sabd_and_or_mask:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: movi v0.2d, #0000000000000000
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: knownbits_sabd_and_or_mask:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: adrp x8, .LCPI33_1
+; CHECK-GI-NEXT: movi v3.2d, #0x00ffff0000ffff
+; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI33_1]
+; CHECK-GI-NEXT: adrp x8, .LCPI33_0
+; CHECK-GI-NEXT: and v0.16b, v0.16b, v2.16b
+; CHECK-GI-NEXT: and v1.16b, v1.16b, v2.16b
+; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI33_0]
+; CHECK-GI-NEXT: orr v0.16b, v0.16b, v3.16b
+; CHECK-GI-NEXT: orr v1.16b, v1.16b, v3.16b
+; CHECK-GI-NEXT: uabd v0.4s, v0.4s, v1.4s
+; CHECK-GI-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b
+; CHECK-GI-NEXT: ret
%1 = and <4 x i32> %a0, <i32 -1, i32 -1, i32 255, i32 4085>
%2 = or <4 x i32> %1, <i32 65535, i32 65535, i32 65535, i32 65535>
%3 = and <4 x i32> %a1, <i32 -1, i32 -1, i32 255, i32 4085>
@@ -392,18 +476,33 @@ define <4 x i32> @knownbits_sabd_and_or_mask(<4 x i32> %a0, <4 x i32> %a1) {
}
define <4 x i32> @knownbits_sabd_and_xor_mask(<4 x i32> %a0, <4 x i32> %a1) {
-; CHECK-LABEL: knownbits_sabd_and_xor_mask:
-; CHECK: // %bb.0:
-; CHECK-NEXT: adrp x8, .LCPI34_0
-; CHECK-NEXT: movi v3.2d, #0x00ffff0000ffff
-; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI34_0]
-; CHECK-NEXT: and v0.16b, v0.16b, v2.16b
-; CHECK-NEXT: and v1.16b, v1.16b, v2.16b
-; CHECK-NEXT: eor v0.16b, v0.16b, v3.16b
-; CHECK-NEXT: eor v1.16b, v1.16b, v3.16b
-; CHECK-NEXT: sabd v0.4s, v0.4s, v1.4s
-; CHECK-NEXT: zip2 v0.4s, v0.4s, v0.4s
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: knownbits_sabd_and_xor_mask:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: adrp x8, .LCPI34_0
+; CHECK-SD-NEXT: movi v3.2d, #0x00ffff0000ffff
+; CHECK-SD-NEXT: ldr q2, [x8, :lo12:.LCPI34_0]
+; CHECK-SD-NEXT: and v0.16b, v0.16b, v2.16b
+; CHECK-SD-NEXT: and v1.16b, v1.16b, v2.16b
+; CHECK-SD-NEXT: eor v0.16b, v0.16b, v3.16b
+; CHECK-SD-NEXT: eor v1.16b, v1.16b, v3.16b
+; CHECK-SD-NEXT: sabd v0.4s, v0.4s, v1.4s
+; CHECK-SD-NEXT: zip2 v0.4s, v0.4s, v0.4s
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: knownbits_sabd_and_xor_mask:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: adrp x8, .LCPI34_1
+; CHECK-GI-NEXT: movi v3.2d, #0x00ffff0000ffff
+; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI34_1]
+; CHECK-GI-NEXT: adrp x8, .LCPI34_0
+; CHECK-GI-NEXT: and v0.16b, v0.16b, v2.16b
+; CHECK-GI-NEXT: and v1.16b, v1.16b, v2.16b
+; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI34_0]
+; CHECK-GI-NEXT: eor v0.16b, v0.16b, v3.16b
+; CHECK-GI-NEXT: eor v1.16b, v1.16b, v3.16b
+; CHECK-GI-NEXT: sabd v0.4s, v0.4s, v1.4s
+; CHECK-GI-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b
+; CHECK-GI-NEXT: ret
%1 = and <4 x i32> %a0, <i32 -1, i32 -1, i32 255, i32 4085>
%2 = xor <4 x i32> %1, <i32 65535, i32 65535, i32 65535, i32 65535>
%3 = and <4 x i32> %a1, <i32 -1, i32 -1, i32 255, i32 4085>
@@ -414,10 +513,24 @@ define <4 x i32> @knownbits_sabd_and_xor_mask(<4 x i32> %a0, <4 x i32> %a1) {
}
define <4 x i32> @knownbits_sabd_and_shl_mask(<4 x i32> %a0, <4 x i32> %a1) {
-; CHECK-LABEL: knownbits_sabd_and_shl_mask:
-; CHECK: // %bb.0:
-; CHECK-NEXT: movi v0.2d, #0000000000000000
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: knownbits_sabd_and_shl_mask:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: movi v0.2d, #0000000000000000
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: knownbits_sabd_and_shl_mask:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: adrp x8, .LCPI35_1
+; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI35_1]
+; CHECK-GI-NEXT: adrp x8, .LCPI35_0
+; CHECK-GI-NEXT: and v0.16b, v0.16b, v2.16b
+; CHECK-GI-NEXT: and v1.16b, v1.16b, v2.16b
+; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI35_0]
+; CHECK-GI-NEXT: shl v0.4s, v0.4s, #17
+; CHECK-GI-NEXT: shl v1.4s, v1.4s, #17
+; CHECK-GI-NEXT: sabd v0.4s, v0.4s, v1.4s
+; CHECK-GI-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b
+; CHECK-GI-NEXT: ret
%1 = and <4 x i32> %a0, <i32 -65536, i32 -7, i32 -7, i32 -65536>
%2 = shl <4 x i32> %1, <i32 17, i32 17, i32 17, i32 17>
%3 = and <4 x i32> %a1, <i32 -65536, i32 -7, i32 -7, i32 -65536>
@@ -428,18 +541,32 @@ define <4 x i32> @knownbits_sabd_and_shl_mask(<4 x i32> %a0, <4 x i32> %a1) {
}
define <4 x i32> @knownbits_sabd_and_mul_mask(<4 x i32> %a0, <4 x i32> %a1) {
-; CHECK-LABEL: knownbits_sabd_and_mul_mask:
-; CHECK: // %bb.0:
-; CHECK-NEXT: adrp x8, .LCPI36_0
-; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI36_0]
-; CHECK-NEXT: and v3.16b, v0.16b, v2.16b
-; CHECK-NEXT: and v2.16b, v1.16b, v2.16b
-; CHECK-NEXT: mul v0.4s, v0.4s, v3.4s
-; CHECK-NEXT: mul v1.4s, v1.4s, v2.4s
-; CHECK-NEXT: sabd v0.4s, v0.4s, v1.4s
-; CHECK-NEXT: mov v0.s[1], v0.s[0]
-; CHECK-NEXT: trn2 v0.4s, v0.4s, v0.4s
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: knownbits_sabd_and_mul_mask:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: adrp x8, .LCPI36_0
+; CHECK-SD-NEXT: ldr q2, [x8, :lo12:.LCPI36_0]
+; CHECK-SD-NEXT: and v3.16b, v0.16b, v2.16b
+; CHECK-SD-NEXT: and v2.16b, v1.16b, v2.16b
+; CHECK-SD-NEXT: mul v0.4s, v0.4s, v3.4s
+; CHECK-SD-NEXT: mul v1.4s, v1.4s, v2.4s
+; CHECK-SD-NEXT: sabd v0.4s, v0.4s, v1.4s
+; CHECK-SD-NEXT: mov v0.s[1], v0.s[0]
+; CHECK-SD-NEXT: trn2 v0.4s, v0.4s, v0.4s
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: knownbits_sabd_and_mul_mask:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: adrp x8, .LCPI36_1
+; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI36_1]
+; CHECK-GI-NEXT: adrp x8, .LCPI36_0
+; CHECK-GI-NEXT: and v3.16b, v0.16b, v2.16b
+; CHECK-GI-NEXT: and v2.16b, v1.16b, v2.16b
+; CHECK-GI-NEXT: mul v0.4s, v0.4s, v3.4s
+; CHECK-GI-NEXT: mul v1.4s, v1.4s, v2.4s
+; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI36_0]
+; CHECK-GI-NEXT: sabd v0.4s, v0.4s, v1.4s
+; CHECK-GI-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b
+; CHECK-GI-NEXT: ret
%1 = and <4 x i32> %a0, <i32 -65536, i32 -7, i32 -7, i32 -65536>
%2 = mul <4 x i32> %a0, %1
%3 = and <4 x i32> %a1, <i32 -65536, i32 -7, i32 -7, i32 -65536>