aboutsummaryrefslogtreecommitdiff
path: root/llvm/test/CodeGen/AArch64/arm64-neon-3vdiff.ll
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/test/CodeGen/AArch64/arm64-neon-3vdiff.ll')
-rw-r--r--llvm/test/CodeGen/AArch64/arm64-neon-3vdiff.ll805
1 files changed, 581 insertions, 224 deletions
diff --git a/llvm/test/CodeGen/AArch64/arm64-neon-3vdiff.ll b/llvm/test/CodeGen/AArch64/arm64-neon-3vdiff.ll
index bd28d13..256ff94 100644
--- a/llvm/test/CodeGen/AArch64/arm64-neon-3vdiff.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-neon-3vdiff.ll
@@ -1,5 +1,11 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon,+aes | FileCheck %s
+; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon,+aes | FileCheck %s --check-prefixes=CHECK,CHECK-SD
+; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon,+aes -global-isel -global-isel-abort=2 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI
+
+; CHECK-GI: warning: Instruction selection used fallback path for test_vmull_p8
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_vmull_high_p8
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_vmull_p64
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_vmull_high_p64
declare <8 x i16> @llvm.aarch64.neon.pmull.v8i16(<8 x i8>, <8 x i8>)
declare <16 x i8> @llvm.aarch64.neon.pmull64(i64, i64) #5
@@ -101,11 +107,18 @@ entry:
}
define <8 x i16> @test_vaddl_a8(<8 x i8> %a, <8 x i8> %b) {
-; CHECK-LABEL: test_vaddl_a8:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: uaddl v0.8h, v0.8b, v1.8b
-; CHECK-NEXT: bic v0.8h, #255, lsl #8
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_vaddl_a8:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: uaddl v0.8h, v0.8b, v1.8b
+; CHECK-SD-NEXT: bic v0.8h, #255, lsl #8
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_vaddl_a8:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: movi v2.2d, #0xff00ff00ff00ff
+; CHECK-GI-NEXT: uaddl v0.8h, v0.8b, v1.8b
+; CHECK-GI-NEXT: and v0.16b, v0.16b, v2.16b
+; CHECK-GI-NEXT: ret
entry:
%vmovl.i.i = zext <8 x i8> %a to <8 x i16>
%vmovl.i2.i = zext <8 x i8> %b to <8 x i16>
@@ -229,11 +242,18 @@ entry:
}
define <8 x i16> @test_vaddl_high_a8(<16 x i8> %a, <16 x i8> %b) {
-; CHECK-LABEL: test_vaddl_high_a8:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: uaddl2 v0.8h, v0.16b, v1.16b
-; CHECK-NEXT: bic v0.8h, #255, lsl #8
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_vaddl_high_a8:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: uaddl2 v0.8h, v0.16b, v1.16b
+; CHECK-SD-NEXT: bic v0.8h, #255, lsl #8
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_vaddl_high_a8:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: movi v2.2d, #0xff00ff00ff00ff
+; CHECK-GI-NEXT: uaddl2 v0.8h, v0.16b, v1.16b
+; CHECK-GI-NEXT: and v0.16b, v0.16b, v2.16b
+; CHECK-GI-NEXT: ret
entry:
%shuffle.i.i.i = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%0 = zext <8 x i8> %shuffle.i.i.i to <8 x i16>
@@ -345,11 +365,18 @@ entry:
}
define <8 x i16> @test_vaddw_a8(<8 x i16> %a, <8 x i8> %b) {
-; CHECK-LABEL: test_vaddw_a8:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: uaddw v0.8h, v0.8h, v1.8b
-; CHECK-NEXT: bic v0.8h, #255, lsl #8
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_vaddw_a8:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: uaddw v0.8h, v0.8h, v1.8b
+; CHECK-SD-NEXT: bic v0.8h, #255, lsl #8
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_vaddw_a8:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: movi v2.2d, #0xff00ff00ff00ff
+; CHECK-GI-NEXT: uaddw v0.8h, v0.8h, v1.8b
+; CHECK-GI-NEXT: and v0.16b, v0.16b, v2.16b
+; CHECK-GI-NEXT: ret
entry:
%vmovl.i.i = zext <8 x i8> %b to <8 x i16>
%add.i = add <8 x i16> %vmovl.i.i, %a
@@ -458,11 +485,18 @@ entry:
}
define <8 x i16> @test_vaddw_high_a8(<8 x i16> %a, <16 x i8> %b) {
-; CHECK-LABEL: test_vaddw_high_a8:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: uaddw2 v0.8h, v0.8h, v1.16b
-; CHECK-NEXT: bic v0.8h, #255, lsl #8
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_vaddw_high_a8:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: uaddw2 v0.8h, v0.8h, v1.16b
+; CHECK-SD-NEXT: bic v0.8h, #255, lsl #8
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_vaddw_high_a8:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: movi v2.2d, #0xff00ff00ff00ff
+; CHECK-GI-NEXT: uaddw2 v0.8h, v0.8h, v1.16b
+; CHECK-GI-NEXT: and v0.16b, v0.16b, v2.16b
+; CHECK-GI-NEXT: ret
entry:
%shuffle.i.i.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%0 = zext <8 x i8> %shuffle.i.i.i to <8 x i16>
@@ -574,11 +608,18 @@ entry:
}
define <8 x i16> @test_vsubl_a8(<8 x i8> %a, <8 x i8> %b) {
-; CHECK-LABEL: test_vsubl_a8:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: usubl v0.8h, v0.8b, v1.8b
-; CHECK-NEXT: bic v0.8h, #255, lsl #8
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_vsubl_a8:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: usubl v0.8h, v0.8b, v1.8b
+; CHECK-SD-NEXT: bic v0.8h, #255, lsl #8
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_vsubl_a8:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: movi v2.2d, #0xff00ff00ff00ff
+; CHECK-GI-NEXT: usubl v0.8h, v0.8b, v1.8b
+; CHECK-GI-NEXT: and v0.16b, v0.16b, v2.16b
+; CHECK-GI-NEXT: ret
entry:
%vmovl.i.i = zext <8 x i8> %a to <8 x i16>
%vmovl.i2.i = zext <8 x i8> %b to <8 x i16>
@@ -702,11 +743,18 @@ entry:
}
define <8 x i16> @test_vsubl_high_a8(<16 x i8> %a, <16 x i8> %b) {
-; CHECK-LABEL: test_vsubl_high_a8:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: usubl2 v0.8h, v0.16b, v1.16b
-; CHECK-NEXT: bic v0.8h, #255, lsl #8
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_vsubl_high_a8:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: usubl2 v0.8h, v0.16b, v1.16b
+; CHECK-SD-NEXT: bic v0.8h, #255, lsl #8
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_vsubl_high_a8:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: movi v2.2d, #0xff00ff00ff00ff
+; CHECK-GI-NEXT: usubl2 v0.8h, v0.16b, v1.16b
+; CHECK-GI-NEXT: and v0.16b, v0.16b, v2.16b
+; CHECK-GI-NEXT: ret
entry:
%shuffle.i.i.i = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%0 = zext <8 x i8> %shuffle.i.i.i to <8 x i16>
@@ -818,11 +866,18 @@ entry:
}
define <8 x i16> @test_vsubw_a8(<8 x i16> %a, <8 x i8> %b) {
-; CHECK-LABEL: test_vsubw_a8:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: usubw v0.8h, v0.8h, v1.8b
-; CHECK-NEXT: bic v0.8h, #255, lsl #8
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_vsubw_a8:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: usubw v0.8h, v0.8h, v1.8b
+; CHECK-SD-NEXT: bic v0.8h, #255, lsl #8
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_vsubw_a8:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: movi v2.2d, #0xff00ff00ff00ff
+; CHECK-GI-NEXT: usubw v0.8h, v0.8h, v1.8b
+; CHECK-GI-NEXT: and v0.16b, v0.16b, v2.16b
+; CHECK-GI-NEXT: ret
entry:
%vmovl.i.i = zext <8 x i8> %b to <8 x i16>
%sub.i = sub <8 x i16> %a, %vmovl.i.i
@@ -931,11 +986,18 @@ entry:
}
define <8 x i16> @test_vsubw_high_a8(<8 x i16> %a, <16 x i8> %b) {
-; CHECK-LABEL: test_vsubw_high_a8:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: usubw2 v0.8h, v0.8h, v1.16b
-; CHECK-NEXT: bic v0.8h, #255, lsl #8
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_vsubw_high_a8:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: usubw2 v0.8h, v0.8h, v1.16b
+; CHECK-SD-NEXT: bic v0.8h, #255, lsl #8
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_vsubw_high_a8:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: movi v2.2d, #0xff00ff00ff00ff
+; CHECK-GI-NEXT: usubw2 v0.8h, v0.8h, v1.16b
+; CHECK-GI-NEXT: and v0.16b, v0.16b, v2.16b
+; CHECK-GI-NEXT: ret
entry:
%shuffle.i.i.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%0 = zext <8 x i8> %shuffle.i.i.i to <8 x i16>
@@ -975,10 +1037,16 @@ entry:
}
define <8 x i8> @test_vaddhn_s16(<8 x i16> %a, <8 x i16> %b) {
-; CHECK-LABEL: test_vaddhn_s16:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: addhn v0.8b, v0.8h, v1.8h
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_vaddhn_s16:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: addhn v0.8b, v0.8h, v1.8h
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_vaddhn_s16:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: add v0.8h, v0.8h, v1.8h
+; CHECK-GI-NEXT: shrn v0.8b, v0.8h, #8
+; CHECK-GI-NEXT: ret
entry:
%vaddhn.i = add <8 x i16> %a, %b
%vaddhn1.i = lshr <8 x i16> %vaddhn.i, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
@@ -987,10 +1055,16 @@ entry:
}
define <4 x i16> @test_vaddhn_s32(<4 x i32> %a, <4 x i32> %b) {
-; CHECK-LABEL: test_vaddhn_s32:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: addhn v0.4h, v0.4s, v1.4s
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_vaddhn_s32:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: addhn v0.4h, v0.4s, v1.4s
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_vaddhn_s32:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: add v0.4s, v0.4s, v1.4s
+; CHECK-GI-NEXT: shrn v0.4h, v0.4s, #16
+; CHECK-GI-NEXT: ret
entry:
%vaddhn.i = add <4 x i32> %a, %b
%vaddhn1.i = lshr <4 x i32> %vaddhn.i, <i32 16, i32 16, i32 16, i32 16>
@@ -999,10 +1073,16 @@ entry:
}
define <2 x i32> @test_vaddhn_s64(<2 x i64> %a, <2 x i64> %b) {
-; CHECK-LABEL: test_vaddhn_s64:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: addhn v0.2s, v0.2d, v1.2d
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_vaddhn_s64:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: addhn v0.2s, v0.2d, v1.2d
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_vaddhn_s64:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: add v0.2d, v0.2d, v1.2d
+; CHECK-GI-NEXT: shrn v0.2s, v0.2d, #32
+; CHECK-GI-NEXT: ret
entry:
%vaddhn.i = add <2 x i64> %a, %b
%vaddhn1.i = lshr <2 x i64> %vaddhn.i, <i64 32, i64 32>
@@ -1011,10 +1091,16 @@ entry:
}
define <8 x i8> @test_vaddhn_u16(<8 x i16> %a, <8 x i16> %b) {
-; CHECK-LABEL: test_vaddhn_u16:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: addhn v0.8b, v0.8h, v1.8h
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_vaddhn_u16:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: addhn v0.8b, v0.8h, v1.8h
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_vaddhn_u16:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: add v0.8h, v0.8h, v1.8h
+; CHECK-GI-NEXT: shrn v0.8b, v0.8h, #8
+; CHECK-GI-NEXT: ret
entry:
%vaddhn.i = add <8 x i16> %a, %b
%vaddhn1.i = lshr <8 x i16> %vaddhn.i, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
@@ -1023,10 +1109,16 @@ entry:
}
define <4 x i16> @test_vaddhn_u32(<4 x i32> %a, <4 x i32> %b) {
-; CHECK-LABEL: test_vaddhn_u32:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: addhn v0.4h, v0.4s, v1.4s
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_vaddhn_u32:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: addhn v0.4h, v0.4s, v1.4s
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_vaddhn_u32:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: add v0.4s, v0.4s, v1.4s
+; CHECK-GI-NEXT: shrn v0.4h, v0.4s, #16
+; CHECK-GI-NEXT: ret
entry:
%vaddhn.i = add <4 x i32> %a, %b
%vaddhn1.i = lshr <4 x i32> %vaddhn.i, <i32 16, i32 16, i32 16, i32 16>
@@ -1035,10 +1127,16 @@ entry:
}
define <2 x i32> @test_vaddhn_u64(<2 x i64> %a, <2 x i64> %b) {
-; CHECK-LABEL: test_vaddhn_u64:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: addhn v0.2s, v0.2d, v1.2d
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_vaddhn_u64:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: addhn v0.2s, v0.2d, v1.2d
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_vaddhn_u64:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: add v0.2d, v0.2d, v1.2d
+; CHECK-GI-NEXT: shrn v0.2s, v0.2d, #32
+; CHECK-GI-NEXT: ret
entry:
%vaddhn.i = add <2 x i64> %a, %b
%vaddhn1.i = lshr <2 x i64> %vaddhn.i, <i64 32, i64 32>
@@ -1047,11 +1145,20 @@ entry:
}
define <16 x i8> @test_vaddhn_high_s16(<8 x i8> %r, <8 x i16> %a, <8 x i16> %b) {
-; CHECK-LABEL: test_vaddhn_high_s16:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT: addhn2 v0.16b, v1.8h, v2.8h
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_vaddhn_high_s16:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-NEXT: addhn2 v0.16b, v1.8h, v2.8h
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_vaddhn_high_s16:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: add v1.8h, v1.8h, v2.8h
+; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-NEXT: shrn v1.8b, v1.8h, #8
+; CHECK-GI-NEXT: fmov x8, d1
+; CHECK-GI-NEXT: mov v0.d[1], x8
+; CHECK-GI-NEXT: ret
entry:
%vaddhn.i.i = add <8 x i16> %a, %b
%vaddhn1.i.i = lshr <8 x i16> %vaddhn.i.i, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
@@ -1064,11 +1171,20 @@ entry:
}
define <8 x i16> @test_vaddhn_high_s32(<4 x i16> %r, <4 x i32> %a, <4 x i32> %b) {
-; CHECK-LABEL: test_vaddhn_high_s32:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT: addhn2 v0.8h, v1.4s, v2.4s
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_vaddhn_high_s32:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-NEXT: addhn2 v0.8h, v1.4s, v2.4s
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_vaddhn_high_s32:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: add v1.4s, v1.4s, v2.4s
+; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-NEXT: shrn v1.4h, v1.4s, #16
+; CHECK-GI-NEXT: fmov x8, d1
+; CHECK-GI-NEXT: mov v0.d[1], x8
+; CHECK-GI-NEXT: ret
entry:
%vaddhn.i.i = add <4 x i32> %a, %b
%vaddhn1.i.i = lshr <4 x i32> %vaddhn.i.i, <i32 16, i32 16, i32 16, i32 16>
@@ -1081,11 +1197,20 @@ entry:
}
define <4 x i32> @test_vaddhn_high_s64(<2 x i32> %r, <2 x i64> %a, <2 x i64> %b) {
-; CHECK-LABEL: test_vaddhn_high_s64:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT: addhn2 v0.4s, v1.2d, v2.2d
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_vaddhn_high_s64:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-NEXT: addhn2 v0.4s, v1.2d, v2.2d
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_vaddhn_high_s64:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: add v1.2d, v1.2d, v2.2d
+; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-NEXT: shrn v1.2s, v1.2d, #32
+; CHECK-GI-NEXT: fmov x8, d1
+; CHECK-GI-NEXT: mov v0.d[1], x8
+; CHECK-GI-NEXT: ret
entry:
%vaddhn.i.i = add <2 x i64> %a, %b
%vaddhn1.i.i = lshr <2 x i64> %vaddhn.i.i, <i64 32, i64 32>
@@ -1098,11 +1223,20 @@ entry:
}
define <16 x i8> @test_vaddhn_high_u16(<8 x i8> %r, <8 x i16> %a, <8 x i16> %b) {
-; CHECK-LABEL: test_vaddhn_high_u16:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT: addhn2 v0.16b, v1.8h, v2.8h
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_vaddhn_high_u16:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-NEXT: addhn2 v0.16b, v1.8h, v2.8h
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_vaddhn_high_u16:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: add v1.8h, v1.8h, v2.8h
+; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-NEXT: shrn v1.8b, v1.8h, #8
+; CHECK-GI-NEXT: fmov x8, d1
+; CHECK-GI-NEXT: mov v0.d[1], x8
+; CHECK-GI-NEXT: ret
entry:
%vaddhn.i.i = add <8 x i16> %a, %b
%vaddhn1.i.i = lshr <8 x i16> %vaddhn.i.i, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
@@ -1115,11 +1249,20 @@ entry:
}
define <8 x i16> @test_vaddhn_high_u32(<4 x i16> %r, <4 x i32> %a, <4 x i32> %b) {
-; CHECK-LABEL: test_vaddhn_high_u32:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT: addhn2 v0.8h, v1.4s, v2.4s
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_vaddhn_high_u32:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-NEXT: addhn2 v0.8h, v1.4s, v2.4s
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_vaddhn_high_u32:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: add v1.4s, v1.4s, v2.4s
+; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-NEXT: shrn v1.4h, v1.4s, #16
+; CHECK-GI-NEXT: fmov x8, d1
+; CHECK-GI-NEXT: mov v0.d[1], x8
+; CHECK-GI-NEXT: ret
entry:
%vaddhn.i.i = add <4 x i32> %a, %b
%vaddhn1.i.i = lshr <4 x i32> %vaddhn.i.i, <i32 16, i32 16, i32 16, i32 16>
@@ -1132,11 +1275,20 @@ entry:
}
define <4 x i32> @test_vaddhn_high_u64(<2 x i32> %r, <2 x i64> %a, <2 x i64> %b) {
-; CHECK-LABEL: test_vaddhn_high_u64:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT: addhn2 v0.4s, v1.2d, v2.2d
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_vaddhn_high_u64:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-NEXT: addhn2 v0.4s, v1.2d, v2.2d
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_vaddhn_high_u64:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: add v1.2d, v1.2d, v2.2d
+; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-NEXT: shrn v1.2s, v1.2d, #32
+; CHECK-GI-NEXT: fmov x8, d1
+; CHECK-GI-NEXT: mov v0.d[1], x8
+; CHECK-GI-NEXT: ret
entry:
%vaddhn.i.i = add <2 x i64> %a, %b
%vaddhn1.i.i = lshr <2 x i64> %vaddhn.i.i, <i64 32, i64 32>
@@ -1209,11 +1361,19 @@ entry:
}
define <16 x i8> @test_vraddhn_high_s16(<8 x i8> %r, <8 x i16> %a, <8 x i16> %b) {
-; CHECK-LABEL: test_vraddhn_high_s16:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT: raddhn2 v0.16b, v1.8h, v2.8h
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_vraddhn_high_s16:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-NEXT: raddhn2 v0.16b, v1.8h, v2.8h
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_vraddhn_high_s16:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: raddhn v1.8b, v1.8h, v2.8h
+; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-NEXT: fmov x8, d1
+; CHECK-GI-NEXT: mov v0.d[1], x8
+; CHECK-GI-NEXT: ret
entry:
%vraddhn2.i.i = tail call <8 x i8> @llvm.aarch64.neon.raddhn.v8i8(<8 x i16> %a, <8 x i16> %b)
%0 = bitcast <8 x i8> %r to <1 x i64>
@@ -1224,11 +1384,19 @@ entry:
}
define <8 x i16> @test_vraddhn_high_s32(<4 x i16> %r, <4 x i32> %a, <4 x i32> %b) {
-; CHECK-LABEL: test_vraddhn_high_s32:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT: raddhn2 v0.8h, v1.4s, v2.4s
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_vraddhn_high_s32:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-NEXT: raddhn2 v0.8h, v1.4s, v2.4s
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_vraddhn_high_s32:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: raddhn v1.4h, v1.4s, v2.4s
+; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-NEXT: fmov x8, d1
+; CHECK-GI-NEXT: mov v0.d[1], x8
+; CHECK-GI-NEXT: ret
entry:
%vraddhn2.i.i = tail call <4 x i16> @llvm.aarch64.neon.raddhn.v4i16(<4 x i32> %a, <4 x i32> %b)
%0 = bitcast <4 x i16> %r to <1 x i64>
@@ -1239,11 +1407,19 @@ entry:
}
define <4 x i32> @test_vraddhn_high_s64(<2 x i32> %r, <2 x i64> %a, <2 x i64> %b) {
-; CHECK-LABEL: test_vraddhn_high_s64:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT: raddhn2 v0.4s, v1.2d, v2.2d
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_vraddhn_high_s64:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-NEXT: raddhn2 v0.4s, v1.2d, v2.2d
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_vraddhn_high_s64:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: raddhn v1.2s, v1.2d, v2.2d
+; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-NEXT: fmov x8, d1
+; CHECK-GI-NEXT: mov v0.d[1], x8
+; CHECK-GI-NEXT: ret
entry:
%vraddhn2.i.i = tail call <2 x i32> @llvm.aarch64.neon.raddhn.v2i32(<2 x i64> %a, <2 x i64> %b)
%0 = bitcast <2 x i32> %r to <1 x i64>
@@ -1254,11 +1430,19 @@ entry:
}
define <16 x i8> @test_vraddhn_high_u16(<8 x i8> %r, <8 x i16> %a, <8 x i16> %b) {
-; CHECK-LABEL: test_vraddhn_high_u16:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT: raddhn2 v0.16b, v1.8h, v2.8h
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_vraddhn_high_u16:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-NEXT: raddhn2 v0.16b, v1.8h, v2.8h
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_vraddhn_high_u16:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: raddhn v1.8b, v1.8h, v2.8h
+; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-NEXT: fmov x8, d1
+; CHECK-GI-NEXT: mov v0.d[1], x8
+; CHECK-GI-NEXT: ret
entry:
%vraddhn2.i.i = tail call <8 x i8> @llvm.aarch64.neon.raddhn.v8i8(<8 x i16> %a, <8 x i16> %b)
%0 = bitcast <8 x i8> %r to <1 x i64>
@@ -1269,11 +1453,19 @@ entry:
}
define <8 x i16> @test_vraddhn_high_u32(<4 x i16> %r, <4 x i32> %a, <4 x i32> %b) {
-; CHECK-LABEL: test_vraddhn_high_u32:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT: raddhn2 v0.8h, v1.4s, v2.4s
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_vraddhn_high_u32:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-NEXT: raddhn2 v0.8h, v1.4s, v2.4s
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_vraddhn_high_u32:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: raddhn v1.4h, v1.4s, v2.4s
+; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-NEXT: fmov x8, d1
+; CHECK-GI-NEXT: mov v0.d[1], x8
+; CHECK-GI-NEXT: ret
entry:
%vraddhn2.i.i = tail call <4 x i16> @llvm.aarch64.neon.raddhn.v4i16(<4 x i32> %a, <4 x i32> %b)
%0 = bitcast <4 x i16> %r to <1 x i64>
@@ -1284,11 +1476,19 @@ entry:
}
define <4 x i32> @test_vraddhn_high_u64(<2 x i32> %r, <2 x i64> %a, <2 x i64> %b) {
-; CHECK-LABEL: test_vraddhn_high_u64:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT: raddhn2 v0.4s, v1.2d, v2.2d
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_vraddhn_high_u64:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-NEXT: raddhn2 v0.4s, v1.2d, v2.2d
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_vraddhn_high_u64:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: raddhn v1.2s, v1.2d, v2.2d
+; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-NEXT: fmov x8, d1
+; CHECK-GI-NEXT: mov v0.d[1], x8
+; CHECK-GI-NEXT: ret
entry:
%vraddhn2.i.i = tail call <2 x i32> @llvm.aarch64.neon.raddhn.v2i32(<2 x i64> %a, <2 x i64> %b)
%0 = bitcast <2 x i32> %r to <1 x i64>
@@ -1299,10 +1499,16 @@ entry:
}
define <8 x i8> @test_vsubhn_s16(<8 x i16> %a, <8 x i16> %b) {
-; CHECK-LABEL: test_vsubhn_s16:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: subhn v0.8b, v0.8h, v1.8h
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_vsubhn_s16:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: subhn v0.8b, v0.8h, v1.8h
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_vsubhn_s16:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: sub v0.8h, v0.8h, v1.8h
+; CHECK-GI-NEXT: shrn v0.8b, v0.8h, #8
+; CHECK-GI-NEXT: ret
entry:
%vsubhn.i = sub <8 x i16> %a, %b
%vsubhn1.i = lshr <8 x i16> %vsubhn.i, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
@@ -1311,10 +1517,16 @@ entry:
}
define <4 x i16> @test_vsubhn_s32(<4 x i32> %a, <4 x i32> %b) {
-; CHECK-LABEL: test_vsubhn_s32:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: subhn v0.4h, v0.4s, v1.4s
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_vsubhn_s32:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: subhn v0.4h, v0.4s, v1.4s
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_vsubhn_s32:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: sub v0.4s, v0.4s, v1.4s
+; CHECK-GI-NEXT: shrn v0.4h, v0.4s, #16
+; CHECK-GI-NEXT: ret
entry:
%vsubhn.i = sub <4 x i32> %a, %b
%vsubhn1.i = lshr <4 x i32> %vsubhn.i, <i32 16, i32 16, i32 16, i32 16>
@@ -1323,10 +1535,16 @@ entry:
}
define <2 x i32> @test_vsubhn_s64(<2 x i64> %a, <2 x i64> %b) {
-; CHECK-LABEL: test_vsubhn_s64:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: subhn v0.2s, v0.2d, v1.2d
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_vsubhn_s64:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: subhn v0.2s, v0.2d, v1.2d
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_vsubhn_s64:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: sub v0.2d, v0.2d, v1.2d
+; CHECK-GI-NEXT: shrn v0.2s, v0.2d, #32
+; CHECK-GI-NEXT: ret
entry:
%vsubhn.i = sub <2 x i64> %a, %b
%vsubhn1.i = lshr <2 x i64> %vsubhn.i, <i64 32, i64 32>
@@ -1335,10 +1553,16 @@ entry:
}
define <8 x i8> @test_vsubhn_u16(<8 x i16> %a, <8 x i16> %b) {
-; CHECK-LABEL: test_vsubhn_u16:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: subhn v0.8b, v0.8h, v1.8h
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_vsubhn_u16:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: subhn v0.8b, v0.8h, v1.8h
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_vsubhn_u16:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: sub v0.8h, v0.8h, v1.8h
+; CHECK-GI-NEXT: shrn v0.8b, v0.8h, #8
+; CHECK-GI-NEXT: ret
entry:
%vsubhn.i = sub <8 x i16> %a, %b
%vsubhn1.i = lshr <8 x i16> %vsubhn.i, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
@@ -1347,10 +1571,16 @@ entry:
}
define <4 x i16> @test_vsubhn_u32(<4 x i32> %a, <4 x i32> %b) {
-; CHECK-LABEL: test_vsubhn_u32:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: subhn v0.4h, v0.4s, v1.4s
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_vsubhn_u32:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: subhn v0.4h, v0.4s, v1.4s
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_vsubhn_u32:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: sub v0.4s, v0.4s, v1.4s
+; CHECK-GI-NEXT: shrn v0.4h, v0.4s, #16
+; CHECK-GI-NEXT: ret
entry:
%vsubhn.i = sub <4 x i32> %a, %b
%vsubhn1.i = lshr <4 x i32> %vsubhn.i, <i32 16, i32 16, i32 16, i32 16>
@@ -1359,10 +1589,16 @@ entry:
}
define <2 x i32> @test_vsubhn_u64(<2 x i64> %a, <2 x i64> %b) {
-; CHECK-LABEL: test_vsubhn_u64:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: subhn v0.2s, v0.2d, v1.2d
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_vsubhn_u64:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: subhn v0.2s, v0.2d, v1.2d
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_vsubhn_u64:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: sub v0.2d, v0.2d, v1.2d
+; CHECK-GI-NEXT: shrn v0.2s, v0.2d, #32
+; CHECK-GI-NEXT: ret
entry:
%vsubhn.i = sub <2 x i64> %a, %b
%vsubhn1.i = lshr <2 x i64> %vsubhn.i, <i64 32, i64 32>
@@ -1371,11 +1607,20 @@ entry:
}
define <16 x i8> @test_vsubhn_high_s16(<8 x i8> %r, <8 x i16> %a, <8 x i16> %b) {
-; CHECK-LABEL: test_vsubhn_high_s16:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT: subhn2 v0.16b, v1.8h, v2.8h
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_vsubhn_high_s16:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-NEXT: subhn2 v0.16b, v1.8h, v2.8h
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_vsubhn_high_s16:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: sub v1.8h, v1.8h, v2.8h
+; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-NEXT: shrn v1.8b, v1.8h, #8
+; CHECK-GI-NEXT: fmov x8, d1
+; CHECK-GI-NEXT: mov v0.d[1], x8
+; CHECK-GI-NEXT: ret
entry:
%vsubhn.i.i = sub <8 x i16> %a, %b
%vsubhn1.i.i = lshr <8 x i16> %vsubhn.i.i, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
@@ -1388,11 +1633,20 @@ entry:
}
define <8 x i16> @test_vsubhn_high_s32(<4 x i16> %r, <4 x i32> %a, <4 x i32> %b) {
-; CHECK-LABEL: test_vsubhn_high_s32:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT: subhn2 v0.8h, v1.4s, v2.4s
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_vsubhn_high_s32:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-NEXT: subhn2 v0.8h, v1.4s, v2.4s
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_vsubhn_high_s32:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: sub v1.4s, v1.4s, v2.4s
+; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-NEXT: shrn v1.4h, v1.4s, #16
+; CHECK-GI-NEXT: fmov x8, d1
+; CHECK-GI-NEXT: mov v0.d[1], x8
+; CHECK-GI-NEXT: ret
entry:
%vsubhn.i.i = sub <4 x i32> %a, %b
%vsubhn1.i.i = lshr <4 x i32> %vsubhn.i.i, <i32 16, i32 16, i32 16, i32 16>
@@ -1405,11 +1659,20 @@ entry:
}
define <4 x i32> @test_vsubhn_high_s64(<2 x i32> %r, <2 x i64> %a, <2 x i64> %b) {
-; CHECK-LABEL: test_vsubhn_high_s64:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT: subhn2 v0.4s, v1.2d, v2.2d
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_vsubhn_high_s64:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-NEXT: subhn2 v0.4s, v1.2d, v2.2d
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_vsubhn_high_s64:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: sub v1.2d, v1.2d, v2.2d
+; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-NEXT: shrn v1.2s, v1.2d, #32
+; CHECK-GI-NEXT: fmov x8, d1
+; CHECK-GI-NEXT: mov v0.d[1], x8
+; CHECK-GI-NEXT: ret
entry:
%vsubhn.i.i = sub <2 x i64> %a, %b
%vsubhn1.i.i = lshr <2 x i64> %vsubhn.i.i, <i64 32, i64 32>
@@ -1422,11 +1685,20 @@ entry:
}
define <16 x i8> @test_vsubhn_high_u16(<8 x i8> %r, <8 x i16> %a, <8 x i16> %b) {
-; CHECK-LABEL: test_vsubhn_high_u16:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT: subhn2 v0.16b, v1.8h, v2.8h
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_vsubhn_high_u16:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-NEXT: subhn2 v0.16b, v1.8h, v2.8h
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_vsubhn_high_u16:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: sub v1.8h, v1.8h, v2.8h
+; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-NEXT: shrn v1.8b, v1.8h, #8
+; CHECK-GI-NEXT: fmov x8, d1
+; CHECK-GI-NEXT: mov v0.d[1], x8
+; CHECK-GI-NEXT: ret
entry:
%vsubhn.i.i = sub <8 x i16> %a, %b
%vsubhn1.i.i = lshr <8 x i16> %vsubhn.i.i, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
@@ -1439,11 +1711,20 @@ entry:
}
define <8 x i16> @test_vsubhn_high_u32(<4 x i16> %r, <4 x i32> %a, <4 x i32> %b) {
-; CHECK-LABEL: test_vsubhn_high_u32:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT: subhn2 v0.8h, v1.4s, v2.4s
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_vsubhn_high_u32:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-NEXT: subhn2 v0.8h, v1.4s, v2.4s
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_vsubhn_high_u32:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: sub v1.4s, v1.4s, v2.4s
+; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-NEXT: shrn v1.4h, v1.4s, #16
+; CHECK-GI-NEXT: fmov x8, d1
+; CHECK-GI-NEXT: mov v0.d[1], x8
+; CHECK-GI-NEXT: ret
entry:
%vsubhn.i.i = sub <4 x i32> %a, %b
%vsubhn1.i.i = lshr <4 x i32> %vsubhn.i.i, <i32 16, i32 16, i32 16, i32 16>
@@ -1456,11 +1737,20 @@ entry:
}
define <4 x i32> @test_vsubhn_high_u64(<2 x i32> %r, <2 x i64> %a, <2 x i64> %b) {
-; CHECK-LABEL: test_vsubhn_high_u64:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT: subhn2 v0.4s, v1.2d, v2.2d
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_vsubhn_high_u64:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-NEXT: subhn2 v0.4s, v1.2d, v2.2d
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_vsubhn_high_u64:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: sub v1.2d, v1.2d, v2.2d
+; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-NEXT: shrn v1.2s, v1.2d, #32
+; CHECK-GI-NEXT: fmov x8, d1
+; CHECK-GI-NEXT: mov v0.d[1], x8
+; CHECK-GI-NEXT: ret
entry:
%vsubhn.i.i = sub <2 x i64> %a, %b
%vsubhn1.i.i = lshr <2 x i64> %vsubhn.i.i, <i64 32, i64 32>
@@ -1533,11 +1823,19 @@ entry:
}
define <16 x i8> @test_vrsubhn_high_s16(<8 x i8> %r, <8 x i16> %a, <8 x i16> %b) {
-; CHECK-LABEL: test_vrsubhn_high_s16:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT: rsubhn2 v0.16b, v1.8h, v2.8h
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_vrsubhn_high_s16:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-NEXT: rsubhn2 v0.16b, v1.8h, v2.8h
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_vrsubhn_high_s16:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: rsubhn v1.8b, v1.8h, v2.8h
+; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-NEXT: fmov x8, d1
+; CHECK-GI-NEXT: mov v0.d[1], x8
+; CHECK-GI-NEXT: ret
entry:
%vrsubhn2.i.i = tail call <8 x i8> @llvm.aarch64.neon.rsubhn.v8i8(<8 x i16> %a, <8 x i16> %b)
%0 = bitcast <8 x i8> %r to <1 x i64>
@@ -1548,11 +1846,19 @@ entry:
}
define <8 x i16> @test_vrsubhn_high_s32(<4 x i16> %r, <4 x i32> %a, <4 x i32> %b) {
-; CHECK-LABEL: test_vrsubhn_high_s32:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT: rsubhn2 v0.8h, v1.4s, v2.4s
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_vrsubhn_high_s32:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-NEXT: rsubhn2 v0.8h, v1.4s, v2.4s
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_vrsubhn_high_s32:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: rsubhn v1.4h, v1.4s, v2.4s
+; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-NEXT: fmov x8, d1
+; CHECK-GI-NEXT: mov v0.d[1], x8
+; CHECK-GI-NEXT: ret
entry:
%vrsubhn2.i.i = tail call <4 x i16> @llvm.aarch64.neon.rsubhn.v4i16(<4 x i32> %a, <4 x i32> %b)
%0 = bitcast <4 x i16> %r to <1 x i64>
@@ -1563,11 +1869,19 @@ entry:
}
define <4 x i32> @test_vrsubhn_high_s64(<2 x i32> %r, <2 x i64> %a, <2 x i64> %b) {
-; CHECK-LABEL: test_vrsubhn_high_s64:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT: rsubhn2 v0.4s, v1.2d, v2.2d
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_vrsubhn_high_s64:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-NEXT: rsubhn2 v0.4s, v1.2d, v2.2d
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_vrsubhn_high_s64:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: rsubhn v1.2s, v1.2d, v2.2d
+; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-NEXT: fmov x8, d1
+; CHECK-GI-NEXT: mov v0.d[1], x8
+; CHECK-GI-NEXT: ret
entry:
%vrsubhn2.i.i = tail call <2 x i32> @llvm.aarch64.neon.rsubhn.v2i32(<2 x i64> %a, <2 x i64> %b)
%0 = bitcast <2 x i32> %r to <1 x i64>
@@ -1578,11 +1892,19 @@ entry:
}
define <16 x i8> @test_vrsubhn_high_u16(<8 x i8> %r, <8 x i16> %a, <8 x i16> %b) {
-; CHECK-LABEL: test_vrsubhn_high_u16:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT: rsubhn2 v0.16b, v1.8h, v2.8h
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_vrsubhn_high_u16:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-NEXT: rsubhn2 v0.16b, v1.8h, v2.8h
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_vrsubhn_high_u16:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: rsubhn v1.8b, v1.8h, v2.8h
+; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-NEXT: fmov x8, d1
+; CHECK-GI-NEXT: mov v0.d[1], x8
+; CHECK-GI-NEXT: ret
entry:
%vrsubhn2.i.i = tail call <8 x i8> @llvm.aarch64.neon.rsubhn.v8i8(<8 x i16> %a, <8 x i16> %b)
%0 = bitcast <8 x i8> %r to <1 x i64>
@@ -1593,11 +1915,19 @@ entry:
}
define <8 x i16> @test_vrsubhn_high_u32(<4 x i16> %r, <4 x i32> %a, <4 x i32> %b) {
-; CHECK-LABEL: test_vrsubhn_high_u32:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT: rsubhn2 v0.8h, v1.4s, v2.4s
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_vrsubhn_high_u32:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-NEXT: rsubhn2 v0.8h, v1.4s, v2.4s
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_vrsubhn_high_u32:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: rsubhn v1.4h, v1.4s, v2.4s
+; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-NEXT: fmov x8, d1
+; CHECK-GI-NEXT: mov v0.d[1], x8
+; CHECK-GI-NEXT: ret
entry:
%vrsubhn2.i.i = tail call <4 x i16> @llvm.aarch64.neon.rsubhn.v4i16(<4 x i32> %a, <4 x i32> %b)
%0 = bitcast <4 x i16> %r to <1 x i64>
@@ -1608,11 +1938,19 @@ entry:
}
define <4 x i32> @test_vrsubhn_high_u64(<2 x i32> %r, <2 x i64> %a, <2 x i64> %b) {
-; CHECK-LABEL: test_vrsubhn_high_u64:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT: rsubhn2 v0.4s, v1.2d, v2.2d
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_vrsubhn_high_u64:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-NEXT: rsubhn2 v0.4s, v1.2d, v2.2d
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_vrsubhn_high_u64:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: rsubhn v1.2s, v1.2d, v2.2d
+; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-NEXT: fmov x8, d1
+; CHECK-GI-NEXT: mov v0.d[1], x8
+; CHECK-GI-NEXT: ret
entry:
%vrsubhn2.i.i = tail call <2 x i32> @llvm.aarch64.neon.rsubhn.v2i32(<2 x i64> %a, <2 x i64> %b)
%0 = bitcast <2 x i32> %r to <1 x i64>
@@ -2535,21 +2873,40 @@ entry:
}
define <8 x i16> @cmplx_mul_combined_re_im(<8 x i16> noundef %a, i64 %scale.coerce) {
-; CHECK-LABEL: cmplx_mul_combined_re_im:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: lsr x8, x0, #16
-; CHECK-NEXT: movi v1.2d, #0xffff0000ffff0000
-; CHECK-NEXT: rev32 v4.8h, v0.8h
-; CHECK-NEXT: dup v2.8h, w8
-; CHECK-NEXT: sqneg v3.8h, v2.8h
-; CHECK-NEXT: bsl v1.16b, v2.16b, v3.16b
-; CHECK-NEXT: fmov d3, x0
-; CHECK-NEXT: sqdmull v2.4s, v4.4h, v1.4h
-; CHECK-NEXT: sqdmull2 v1.4s, v4.8h, v1.8h
-; CHECK-NEXT: sqdmlal v2.4s, v0.4h, v3.h[0]
-; CHECK-NEXT: sqdmlal2 v1.4s, v0.8h, v3.h[0]
-; CHECK-NEXT: uzp2 v0.8h, v2.8h, v1.8h
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: cmplx_mul_combined_re_im:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: lsr x8, x0, #16
+; CHECK-SD-NEXT: movi v1.2d, #0xffff0000ffff0000
+; CHECK-SD-NEXT: rev32 v4.8h, v0.8h
+; CHECK-SD-NEXT: dup v2.8h, w8
+; CHECK-SD-NEXT: sqneg v3.8h, v2.8h
+; CHECK-SD-NEXT: bsl v1.16b, v2.16b, v3.16b
+; CHECK-SD-NEXT: fmov d3, x0
+; CHECK-SD-NEXT: sqdmull v2.4s, v4.4h, v1.4h
+; CHECK-SD-NEXT: sqdmull2 v1.4s, v4.8h, v1.8h
+; CHECK-SD-NEXT: sqdmlal v2.4s, v0.4h, v3.h[0]
+; CHECK-SD-NEXT: sqdmlal2 v1.4s, v0.8h, v3.h[0]
+; CHECK-SD-NEXT: uzp2 v0.8h, v2.8h, v1.8h
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: cmplx_mul_combined_re_im:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: lsr x9, x0, #16
+; CHECK-GI-NEXT: adrp x8, .LCPI196_0
+; CHECK-GI-NEXT: rev32 v4.8h, v0.8h
+; CHECK-GI-NEXT: ldr q3, [x8, :lo12:.LCPI196_0]
+; CHECK-GI-NEXT: fmov d1, x9
+; CHECK-GI-NEXT: dup v2.8h, v1.h[0]
+; CHECK-GI-NEXT: sqneg v1.8h, v2.8h
+; CHECK-GI-NEXT: tbl v1.16b, { v1.16b, v2.16b }, v3.16b
+; CHECK-GI-NEXT: mov d2, v0.d[1]
+; CHECK-GI-NEXT: fmov d3, x0
+; CHECK-GI-NEXT: sqdmull v2.4s, v2.4h, v3.h[0]
+; CHECK-GI-NEXT: sqdmull v5.4s, v4.4h, v1.4h
+; CHECK-GI-NEXT: sqdmlal v5.4s, v0.4h, v3.h[0]
+; CHECK-GI-NEXT: sqdmlal2 v2.4s, v4.8h, v1.8h
+; CHECK-GI-NEXT: uzp2 v0.8h, v5.8h, v2.8h
+; CHECK-GI-NEXT: ret
entry:
%scale.sroa.2.0.extract.shift23 = lshr i64 %scale.coerce, 16
%shuffle.i = shufflevector <8 x i16> %a, <8 x i16> poison, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>