aboutsummaryrefslogtreecommitdiff
path: root/llvm/test/CodeGen/AArch64
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/test/CodeGen/AArch64')
-rw-r--r--llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-switch-split.ll10
-rw-r--r--llvm/test/CodeGen/AArch64/GlobalISel/knownbits-ashr.mir109
-rw-r--r--llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir11
-rw-r--r--llvm/test/CodeGen/AArch64/aarch64-dup-ext.ll4
-rw-r--r--llvm/test/CodeGen/AArch64/aarch64-mops.ll188
-rw-r--r--llvm/test/CodeGen/AArch64/aarch64-smull.ll67
-rw-r--r--llvm/test/CodeGen/AArch64/aarch64-split-and-bitmask-immediate.ll107
-rw-r--r--llvm/test/CodeGen/AArch64/arm64-neon-3vdiff.ll805
-rw-r--r--llvm/test/CodeGen/AArch64/arm64-neon-aba-abd.ll285
-rw-r--r--llvm/test/CodeGen/AArch64/bsp_implicit_ops.mir98
-rw-r--r--llvm/test/CodeGen/AArch64/combine-sdiv.ll1
-rw-r--r--llvm/test/CodeGen/AArch64/freeze.ll34
-rw-r--r--llvm/test/CodeGen/AArch64/highextractbitcast.ll172
-rw-r--r--llvm/test/CodeGen/AArch64/neon-saba.ll93
-rw-r--r--llvm/test/CodeGen/AArch64/rem-by-const.ll1
-rw-r--r--llvm/test/CodeGen/AArch64/stack-tagging.ll50
-rw-r--r--llvm/test/CodeGen/AArch64/sve-intrinsics-ldst-ext.ll24
-rw-r--r--llvm/test/CodeGen/AArch64/wineh-reuse-catch-alloca.ll100
18 files changed, 1446 insertions, 713 deletions
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-switch-split.ll b/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-switch-split.ll
index 55cf48e..d1a6584a 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-switch-split.ll
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-switch-split.ll
@@ -9,7 +9,7 @@ declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture) #0
declare i32 @logg(...)
-define i32 @scanfile(i32 %call148) {
+define i32 @scanfile(i32 %call148, ptr %p) {
; CHECK-LABEL: scanfile:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill
@@ -26,7 +26,7 @@ define i32 @scanfile(i32 %call148) {
; CHECK-NEXT: ldp x29, x30, [sp], #16 ; 16-byte Folded Reload
; CHECK-NEXT: ret
; CHECK-NEXT: LBB0_3: ; %entry
-; CHECK-NEXT: b.eq LBB0_2
+; CHECK-NEXT: b.eq LBB0_10
; CHECK-NEXT: ; %bb.4: ; %entry
; CHECK-NEXT: cmp w8, #2
; CHECK-NEXT: b.eq LBB0_6
@@ -46,6 +46,10 @@ define i32 @scanfile(i32 %call148) {
; CHECK-NEXT: LBB0_9: ; %sw.bb150
; CHECK-NEXT: bl _logg
; CHECK-NEXT: brk #0x1
+; CHECK-NEXT: LBB0_10: ; %sw.bb178
+; CHECK-NEXT: str wzr, [x1]
+; CHECK-NEXT: ldp x29, x30, [sp], #16 ; 16-byte Folded Reload
+; CHECK-NEXT: ret
entry:
switch i32 %call148, label %common.ret [
i32 -1, label %sw.bb
@@ -80,7 +84,7 @@ sw.bb152: ; preds = %entry
br label %common.ret
sw.bb178: ; preds = %entry
- call void @llvm.lifetime.start.p0(i64 0, ptr null)
+ store i32 0, ptr %p
br label %common.ret
}
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/knownbits-ashr.mir b/llvm/test/CodeGen/AArch64/GlobalISel/knownbits-ashr.mir
new file mode 100644
index 0000000..8552931
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/knownbits-ashr.mir
@@ -0,0 +1,109 @@
+# NOTE: Assertions have been autogenerated by utils/update_givaluetracking_test_checks.py UTC_ARGS: --version 5
+# RUN: llc -mtriple aarch64 -passes="print<gisel-value-tracking>" %s -o - 2>&1 | FileCheck %s
+
+---
+name: Cst
+body: |
+ bb.1:
+ ; CHECK-LABEL: name: @Cst
+ ; CHECK-NEXT: %0:_ KnownBits:10000000 SignBits:1
+ ; CHECK-NEXT: %1:_ KnownBits:00000011 SignBits:6
+ ; CHECK-NEXT: %2:_ KnownBits:11110000 SignBits:4
+ %0:_(s8) = G_CONSTANT i8 128
+ %1:_(s8) = G_CONSTANT i8 3
+ %2:_(s8) = G_ASHR %0, %1
+...
+---
+name: CstBig
+body: |
+ bb.1:
+ ; CHECK-LABEL: name: @CstBig
+ ; CHECK-NEXT: %0:_ KnownBits:11111000 SignBits:5
+ ; CHECK-NEXT: %1:_ KnownBits:00000110 SignBits:5
+ ; CHECK-NEXT: %2:_ KnownBits:11111111 SignBits:8
+ %0:_(s8) = G_CONSTANT i8 248
+ %1:_(s8) = G_CONSTANT i8 6
+ %2:_(s8) = G_ASHR %0, %1
+...
+---
+name: ScalarVar
+body: |
+ bb.1:
+ ; CHECK-LABEL: name: @ScalarVar
+ ; CHECK-NEXT: %0:_ KnownBits:???????? SignBits:1
+ ; CHECK-NEXT: %1:_ KnownBits:???????? SignBits:1
+ ; CHECK-NEXT: %2:_ KnownBits:???????? SignBits:1
+ %0:_(s8) = COPY $b0
+ %1:_(s8) = COPY $b1
+ %2:_(s8) = G_ASHR %0, %1
+...
+---
+name: ScalarCst
+body: |
+ bb.1:
+ ; CHECK-LABEL: name: @ScalarCst
+ ; CHECK-NEXT: %0:_ KnownBits:???????? SignBits:1
+ ; CHECK-NEXT: %1:_ KnownBits:00000011 SignBits:6
+ ; CHECK-NEXT: %2:_ KnownBits:???????? SignBits:4
+ %0:_(s8) = COPY $b0
+ %1:_(s8) = G_CONSTANT i8 3
+ %2:_(s8) = G_ASHR %0, %1
+...
+---
+name: VectorVar
+body: |
+ bb.1:
+ ; CHECK-LABEL: name: @VectorVar
+ ; CHECK-NEXT: %0:_ KnownBits:???????????????? SignBits:1
+ ; CHECK-NEXT: %1:_ KnownBits:???????????????? SignBits:1
+ ; CHECK-NEXT: %2:_ KnownBits:???????????????? SignBits:1
+ %0:_(<4 x s16>) = COPY $d0
+ %1:_(<4 x s16>) = COPY $d1
+ %2:_(<4 x s16>) = G_ASHR %0, %1
+...
+---
+name: VectorCst
+body: |
+ bb.1:
+ ; CHECK-LABEL: name: @VectorCst
+ ; CHECK-NEXT: %0:_ KnownBits:???????????????? SignBits:1
+ ; CHECK-NEXT: %1:_ KnownBits:0000000000000011 SignBits:14
+ ; CHECK-NEXT: %2:_ KnownBits:0000000000000011 SignBits:14
+ ; CHECK-NEXT: %3:_ KnownBits:???????????????? SignBits:4
+ %0:_(<4 x s16>) = COPY $d0
+ %1:_(s16) = G_CONSTANT i16 3
+ %2:_(<4 x s16>) = G_BUILD_VECTOR %1, %1, %1, %1
+ %3:_(<4 x s16>) = G_ASHR %0, %2
+...
+---
+name: VectorCst36
+body: |
+ bb.1:
+ ; CHECK-LABEL: name: @VectorCst36
+ ; CHECK-NEXT: %0:_ KnownBits:???????????????? SignBits:1
+ ; CHECK-NEXT: %1:_ KnownBits:0000000000000011 SignBits:14
+ ; CHECK-NEXT: %2:_ KnownBits:0000000000000110 SignBits:13
+ ; CHECK-NEXT: %3:_ KnownBits:0000000000000?1? SignBits:13
+ ; CHECK-NEXT: %4:_ KnownBits:???????????????? SignBits:4
+ %0:_(<4 x s16>) = COPY $d0
+ %1:_(s16) = G_CONSTANT i16 3
+ %2:_(s16) = G_CONSTANT i16 6
+ %3:_(<4 x s16>) = G_BUILD_VECTOR %1, %2, %2, %1
+ %4:_(<4 x s16>) = G_ASHR %0, %3
+...
+---
+name: VectorCst3unknown
+body: |
+ bb.1:
+ ; CHECK-LABEL: name: @VectorCst3unknown
+ ; CHECK-NEXT: %0:_ KnownBits:???????????????? SignBits:1
+ ; CHECK-NEXT: %1:_ KnownBits:???????????????? SignBits:1
+ ; CHECK-NEXT: %2:_ KnownBits:0000000000000011 SignBits:14
+ ; CHECK-NEXT: %3:_ KnownBits:???????????????? SignBits:1
+ ; CHECK-NEXT: %4:_ KnownBits:???????????????? SignBits:1
+ %0:_(<4 x s16>) = COPY $d0
+ %2:_(s16) = COPY $h0
+ %1:_(s16) = G_CONSTANT i16 3
+ %3:_(<4 x s16>) = G_BUILD_VECTOR %1, %2, %2, %1
+ %4:_(<4 x s16>) = G_ASHR %0, %3
+...
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir
index bd2d8c09..5c164bf 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir
@@ -71,12 +71,13 @@
# DEBUG-NEXT: .. the first uncovered imm index: 0, OK
#
# DEBUG-NEXT: G_ABDS (opcode 65): 1 type index, 0 imm indices
-# DEBUG-NEXT:.. type index coverage check SKIPPED: no rules defined
-# DEBUG-NEXT:.. imm index coverage check SKIPPED: no rules defined
+# DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected
+# DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected
#
-# DEBUG-NEXT:G_ABDU (opcode 66): 1 type index, 0 imm indices
-# DEBUG-NEXT:.. type index coverage check SKIPPED: no rules defined
-# DEBUG-NEXT:.. imm index coverage check SKIPPED: no rules defined
+# DEBUG-NEXT: G_ABDU (opcode 66): 1 type index, 0 imm indices
+# DEBUG-NEXT: .. opcode {{[0-9]+}} is aliased to {{[0-9]+}}
+# DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected
+# DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected
#
# DEBUG-NEXT: G_IMPLICIT_DEF (opcode {{[0-9]+}}): 1 type index, 0 imm indices
# DEBUG-NEXT: .. the first uncovered type index: {{[0-9]+}}, OK
diff --git a/llvm/test/CodeGen/AArch64/aarch64-dup-ext.ll b/llvm/test/CodeGen/AArch64/aarch64-dup-ext.ll
index be79135..747db39 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-dup-ext.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-dup-ext.ll
@@ -14,10 +14,10 @@ define <8 x i16> @dupsext_v8i8_v8i16(i8 %src, <8 x i8> %b) {
; CHECK-GI-LABEL: dupsext_v8i8_v8i16:
; CHECK-GI: // %bb.0: // %entry
; CHECK-GI-NEXT: lsl w8, w0, #8
-; CHECK-GI-NEXT: sshll v0.8h, v0.8b, #0
; CHECK-GI-NEXT: sbfx w8, w8, #8, #8
; CHECK-GI-NEXT: dup v1.8h, w8
-; CHECK-GI-NEXT: mul v0.8h, v1.8h, v0.8h
+; CHECK-GI-NEXT: xtn v1.8b, v1.8h
+; CHECK-GI-NEXT: smull v0.8h, v1.8b, v0.8b
; CHECK-GI-NEXT: ret
entry:
%in = sext i8 %src to i16
diff --git a/llvm/test/CodeGen/AArch64/aarch64-mops.ll b/llvm/test/CodeGen/AArch64/aarch64-mops.ll
index ff7872c..83530049a 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-mops.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-mops.ll
@@ -87,46 +87,17 @@ entry:
}
define void @memset_10_zeroval_volatile(ptr %dst) {
-; GISel-WITHOUT-MOPS-O0-LABEL: memset_10_zeroval_volatile:
-; GISel-WITHOUT-MOPS-O0: // %bb.0: // %entry
-; GISel-WITHOUT-MOPS-O0-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
-; GISel-WITHOUT-MOPS-O0-NEXT: .cfi_def_cfa_offset 16
-; GISel-WITHOUT-MOPS-O0-NEXT: .cfi_offset w30, -16
-; GISel-WITHOUT-MOPS-O0-NEXT: mov w8, #10 // =0xa
-; GISel-WITHOUT-MOPS-O0-NEXT: mov w2, w8
-; GISel-WITHOUT-MOPS-O0-NEXT: mov w1, wzr
-; GISel-WITHOUT-MOPS-O0-NEXT: bl memset
-; GISel-WITHOUT-MOPS-O0-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
-; GISel-WITHOUT-MOPS-O0-NEXT: ret
-;
-; GISel-WITHOUT-MOPS-O3-LABEL: memset_10_zeroval_volatile:
-; GISel-WITHOUT-MOPS-O3: // %bb.0: // %entry
-; GISel-WITHOUT-MOPS-O3-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
-; GISel-WITHOUT-MOPS-O3-NEXT: .cfi_def_cfa_offset 16
-; GISel-WITHOUT-MOPS-O3-NEXT: .cfi_offset w30, -16
-; GISel-WITHOUT-MOPS-O3-NEXT: mov w1, wzr
-; GISel-WITHOUT-MOPS-O3-NEXT: mov w2, #10 // =0xa
-; GISel-WITHOUT-MOPS-O3-NEXT: bl memset
-; GISel-WITHOUT-MOPS-O3-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
-; GISel-WITHOUT-MOPS-O3-NEXT: ret
-;
-; GISel-MOPS-O0-LABEL: memset_10_zeroval_volatile:
-; GISel-MOPS-O0: // %bb.0: // %entry
-; GISel-MOPS-O0-NEXT: mov w8, #10 // =0xa
-; GISel-MOPS-O0-NEXT: // kill: def $x8 killed $w8
-; GISel-MOPS-O0-NEXT: mov x9, xzr
-; GISel-MOPS-O0-NEXT: setp [x0]!, x8!, x9
-; GISel-MOPS-O0-NEXT: setm [x0]!, x8!, x9
-; GISel-MOPS-O0-NEXT: sete [x0]!, x8!, x9
-; GISel-MOPS-O0-NEXT: ret
+; GISel-WITHOUT-MOPS-LABEL: memset_10_zeroval_volatile:
+; GISel-WITHOUT-MOPS: // %bb.0: // %entry
+; GISel-WITHOUT-MOPS-NEXT: str xzr, [x0]
+; GISel-WITHOUT-MOPS-NEXT: strh wzr, [x0, #8]
+; GISel-WITHOUT-MOPS-NEXT: ret
;
-; GISel-MOPS-O3-LABEL: memset_10_zeroval_volatile:
-; GISel-MOPS-O3: // %bb.0: // %entry
-; GISel-MOPS-O3-NEXT: mov w8, #10 // =0xa
-; GISel-MOPS-O3-NEXT: setp [x0]!, x8!, xzr
-; GISel-MOPS-O3-NEXT: setm [x0]!, x8!, xzr
-; GISel-MOPS-O3-NEXT: sete [x0]!, x8!, xzr
-; GISel-MOPS-O3-NEXT: ret
+; GISel-MOPS-LABEL: memset_10_zeroval_volatile:
+; GISel-MOPS: // %bb.0: // %entry
+; GISel-MOPS-NEXT: str xzr, [x0]
+; GISel-MOPS-NEXT: strh wzr, [x0, #8]
+; GISel-MOPS-NEXT: ret
;
; SDAG-WITHOUT-MOPS-O2-LABEL: memset_10_zeroval_volatile:
; SDAG-WITHOUT-MOPS-O2: // %bb.0: // %entry
@@ -490,43 +461,46 @@ entry:
define void @memset_10_volatile(ptr %dst, i32 %value) {
; GISel-WITHOUT-MOPS-O0-LABEL: memset_10_volatile:
; GISel-WITHOUT-MOPS-O0: // %bb.0: // %entry
-; GISel-WITHOUT-MOPS-O0-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
-; GISel-WITHOUT-MOPS-O0-NEXT: .cfi_def_cfa_offset 16
-; GISel-WITHOUT-MOPS-O0-NEXT: .cfi_offset w30, -16
-; GISel-WITHOUT-MOPS-O0-NEXT: mov w8, #10 // =0xa
-; GISel-WITHOUT-MOPS-O0-NEXT: mov w2, w8
-; GISel-WITHOUT-MOPS-O0-NEXT: bl memset
-; GISel-WITHOUT-MOPS-O0-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; GISel-WITHOUT-MOPS-O0-NEXT: // implicit-def: $x8
+; GISel-WITHOUT-MOPS-O0-NEXT: mov w8, w1
+; GISel-WITHOUT-MOPS-O0-NEXT: and x8, x8, #0xff
+; GISel-WITHOUT-MOPS-O0-NEXT: mov x9, #72340172838076673 // =0x101010101010101
+; GISel-WITHOUT-MOPS-O0-NEXT: mul x8, x8, x9
+; GISel-WITHOUT-MOPS-O0-NEXT: str x8, [x0]
+; GISel-WITHOUT-MOPS-O0-NEXT: // kill: def $w8 killed $w8 killed $x8
+; GISel-WITHOUT-MOPS-O0-NEXT: strh w8, [x0, #8]
; GISel-WITHOUT-MOPS-O0-NEXT: ret
;
; GISel-WITHOUT-MOPS-O3-LABEL: memset_10_volatile:
; GISel-WITHOUT-MOPS-O3: // %bb.0: // %entry
-; GISel-WITHOUT-MOPS-O3-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
-; GISel-WITHOUT-MOPS-O3-NEXT: .cfi_def_cfa_offset 16
-; GISel-WITHOUT-MOPS-O3-NEXT: .cfi_offset w30, -16
-; GISel-WITHOUT-MOPS-O3-NEXT: mov w2, #10 // =0xa
-; GISel-WITHOUT-MOPS-O3-NEXT: bl memset
-; GISel-WITHOUT-MOPS-O3-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; GISel-WITHOUT-MOPS-O3-NEXT: // kill: def $w1 killed $w1 def $x1
+; GISel-WITHOUT-MOPS-O3-NEXT: mov x8, #72340172838076673 // =0x101010101010101
+; GISel-WITHOUT-MOPS-O3-NEXT: and x9, x1, #0xff
+; GISel-WITHOUT-MOPS-O3-NEXT: mul x8, x9, x8
+; GISel-WITHOUT-MOPS-O3-NEXT: str x8, [x0]
+; GISel-WITHOUT-MOPS-O3-NEXT: strh w8, [x0, #8]
; GISel-WITHOUT-MOPS-O3-NEXT: ret
;
; GISel-MOPS-O0-LABEL: memset_10_volatile:
; GISel-MOPS-O0: // %bb.0: // %entry
-; GISel-MOPS-O0-NEXT: mov w8, #10 // =0xa
-; GISel-MOPS-O0-NEXT: // kill: def $x8 killed $w8
-; GISel-MOPS-O0-NEXT: // implicit-def: $x9
-; GISel-MOPS-O0-NEXT: mov w9, w1
-; GISel-MOPS-O0-NEXT: setp [x0]!, x8!, x9
-; GISel-MOPS-O0-NEXT: setm [x0]!, x8!, x9
-; GISel-MOPS-O0-NEXT: sete [x0]!, x8!, x9
+; GISel-MOPS-O0-NEXT: // implicit-def: $x8
+; GISel-MOPS-O0-NEXT: mov w8, w1
+; GISel-MOPS-O0-NEXT: and x8, x8, #0xff
+; GISel-MOPS-O0-NEXT: mov x9, #72340172838076673 // =0x101010101010101
+; GISel-MOPS-O0-NEXT: mul x8, x8, x9
+; GISel-MOPS-O0-NEXT: str x8, [x0]
+; GISel-MOPS-O0-NEXT: // kill: def $w8 killed $w8 killed $x8
+; GISel-MOPS-O0-NEXT: strh w8, [x0, #8]
; GISel-MOPS-O0-NEXT: ret
;
; GISel-MOPS-O3-LABEL: memset_10_volatile:
; GISel-MOPS-O3: // %bb.0: // %entry
-; GISel-MOPS-O3-NEXT: mov w8, #10 // =0xa
; GISel-MOPS-O3-NEXT: // kill: def $w1 killed $w1 def $x1
-; GISel-MOPS-O3-NEXT: setp [x0]!, x8!, x1
-; GISel-MOPS-O3-NEXT: setm [x0]!, x8!, x1
-; GISel-MOPS-O3-NEXT: sete [x0]!, x8!, x1
+; GISel-MOPS-O3-NEXT: mov x8, #72340172838076673 // =0x101010101010101
+; GISel-MOPS-O3-NEXT: and x9, x1, #0xff
+; GISel-MOPS-O3-NEXT: mul x8, x9, x8
+; GISel-MOPS-O3-NEXT: str x8, [x0]
+; GISel-MOPS-O3-NEXT: strh w8, [x0, #8]
; GISel-MOPS-O3-NEXT: ret
;
; SDAG-WITHOUT-MOPS-O2-LABEL: memset_10_volatile:
@@ -905,43 +879,21 @@ entry:
}
define void @memcpy_10_volatile(ptr %dst, ptr %src, i32 %value) {
-; GISel-WITHOUT-MOPS-O0-LABEL: memcpy_10_volatile:
-; GISel-WITHOUT-MOPS-O0: // %bb.0: // %entry
-; GISel-WITHOUT-MOPS-O0-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
-; GISel-WITHOUT-MOPS-O0-NEXT: .cfi_def_cfa_offset 16
-; GISel-WITHOUT-MOPS-O0-NEXT: .cfi_offset w30, -16
-; GISel-WITHOUT-MOPS-O0-NEXT: mov w8, #10 // =0xa
-; GISel-WITHOUT-MOPS-O0-NEXT: mov w2, w8
-; GISel-WITHOUT-MOPS-O0-NEXT: bl memcpy
-; GISel-WITHOUT-MOPS-O0-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
-; GISel-WITHOUT-MOPS-O0-NEXT: ret
-;
-; GISel-WITHOUT-MOPS-O3-LABEL: memcpy_10_volatile:
-; GISel-WITHOUT-MOPS-O3: // %bb.0: // %entry
-; GISel-WITHOUT-MOPS-O3-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
-; GISel-WITHOUT-MOPS-O3-NEXT: .cfi_def_cfa_offset 16
-; GISel-WITHOUT-MOPS-O3-NEXT: .cfi_offset w30, -16
-; GISel-WITHOUT-MOPS-O3-NEXT: mov w2, #10 // =0xa
-; GISel-WITHOUT-MOPS-O3-NEXT: bl memcpy
-; GISel-WITHOUT-MOPS-O3-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
-; GISel-WITHOUT-MOPS-O3-NEXT: ret
-;
-; GISel-MOPS-O0-LABEL: memcpy_10_volatile:
-; GISel-MOPS-O0: // %bb.0: // %entry
-; GISel-MOPS-O0-NEXT: mov w8, #10 // =0xa
-; GISel-MOPS-O0-NEXT: // kill: def $x8 killed $w8
-; GISel-MOPS-O0-NEXT: cpyfp [x0]!, [x1]!, x8!
-; GISel-MOPS-O0-NEXT: cpyfm [x0]!, [x1]!, x8!
-; GISel-MOPS-O0-NEXT: cpyfe [x0]!, [x1]!, x8!
-; GISel-MOPS-O0-NEXT: ret
+; GISel-WITHOUT-MOPS-LABEL: memcpy_10_volatile:
+; GISel-WITHOUT-MOPS: // %bb.0: // %entry
+; GISel-WITHOUT-MOPS-NEXT: ldr x8, [x1]
+; GISel-WITHOUT-MOPS-NEXT: str x8, [x0]
+; GISel-WITHOUT-MOPS-NEXT: ldrh w8, [x1, #8]
+; GISel-WITHOUT-MOPS-NEXT: strh w8, [x0, #8]
+; GISel-WITHOUT-MOPS-NEXT: ret
;
-; GISel-MOPS-O3-LABEL: memcpy_10_volatile:
-; GISel-MOPS-O3: // %bb.0: // %entry
-; GISel-MOPS-O3-NEXT: mov w8, #10 // =0xa
-; GISel-MOPS-O3-NEXT: cpyfp [x0]!, [x1]!, x8!
-; GISel-MOPS-O3-NEXT: cpyfm [x0]!, [x1]!, x8!
-; GISel-MOPS-O3-NEXT: cpyfe [x0]!, [x1]!, x8!
-; GISel-MOPS-O3-NEXT: ret
+; GISel-MOPS-LABEL: memcpy_10_volatile:
+; GISel-MOPS: // %bb.0: // %entry
+; GISel-MOPS-NEXT: ldr x8, [x1]
+; GISel-MOPS-NEXT: str x8, [x0]
+; GISel-MOPS-NEXT: ldrh w8, [x1, #8]
+; GISel-MOPS-NEXT: strh w8, [x0, #8]
+; GISel-MOPS-NEXT: ret
;
; SDAG-WITHOUT-MOPS-O2-LABEL: memcpy_10_volatile:
; SDAG-WITHOUT-MOPS-O2: // %bb.0: // %entry
@@ -1736,40 +1688,34 @@ entry:
define void @memmove_10_volatile(ptr %dst, ptr %src, i32 %value) {
; GISel-WITHOUT-MOPS-O0-LABEL: memmove_10_volatile:
; GISel-WITHOUT-MOPS-O0: // %bb.0: // %entry
-; GISel-WITHOUT-MOPS-O0-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
-; GISel-WITHOUT-MOPS-O0-NEXT: .cfi_def_cfa_offset 16
-; GISel-WITHOUT-MOPS-O0-NEXT: .cfi_offset w30, -16
-; GISel-WITHOUT-MOPS-O0-NEXT: mov w8, #10 // =0xa
-; GISel-WITHOUT-MOPS-O0-NEXT: mov w2, w8
-; GISel-WITHOUT-MOPS-O0-NEXT: bl memmove
-; GISel-WITHOUT-MOPS-O0-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; GISel-WITHOUT-MOPS-O0-NEXT: ldr x9, [x1]
+; GISel-WITHOUT-MOPS-O0-NEXT: ldrh w8, [x1, #8]
+; GISel-WITHOUT-MOPS-O0-NEXT: str x9, [x0]
+; GISel-WITHOUT-MOPS-O0-NEXT: strh w8, [x0, #8]
; GISel-WITHOUT-MOPS-O0-NEXT: ret
;
; GISel-WITHOUT-MOPS-O3-LABEL: memmove_10_volatile:
; GISel-WITHOUT-MOPS-O3: // %bb.0: // %entry
-; GISel-WITHOUT-MOPS-O3-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
-; GISel-WITHOUT-MOPS-O3-NEXT: .cfi_def_cfa_offset 16
-; GISel-WITHOUT-MOPS-O3-NEXT: .cfi_offset w30, -16
-; GISel-WITHOUT-MOPS-O3-NEXT: mov w2, #10 // =0xa
-; GISel-WITHOUT-MOPS-O3-NEXT: bl memmove
-; GISel-WITHOUT-MOPS-O3-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; GISel-WITHOUT-MOPS-O3-NEXT: ldr x8, [x1]
+; GISel-WITHOUT-MOPS-O3-NEXT: ldrh w9, [x1, #8]
+; GISel-WITHOUT-MOPS-O3-NEXT: str x8, [x0]
+; GISel-WITHOUT-MOPS-O3-NEXT: strh w9, [x0, #8]
; GISel-WITHOUT-MOPS-O3-NEXT: ret
;
; GISel-MOPS-O0-LABEL: memmove_10_volatile:
; GISel-MOPS-O0: // %bb.0: // %entry
-; GISel-MOPS-O0-NEXT: mov w8, #10 // =0xa
-; GISel-MOPS-O0-NEXT: // kill: def $x8 killed $w8
-; GISel-MOPS-O0-NEXT: cpyp [x0]!, [x1]!, x8!
-; GISel-MOPS-O0-NEXT: cpym [x0]!, [x1]!, x8!
-; GISel-MOPS-O0-NEXT: cpye [x0]!, [x1]!, x8!
+; GISel-MOPS-O0-NEXT: ldr x9, [x1]
+; GISel-MOPS-O0-NEXT: ldrh w8, [x1, #8]
+; GISel-MOPS-O0-NEXT: str x9, [x0]
+; GISel-MOPS-O0-NEXT: strh w8, [x0, #8]
; GISel-MOPS-O0-NEXT: ret
;
; GISel-MOPS-O3-LABEL: memmove_10_volatile:
; GISel-MOPS-O3: // %bb.0: // %entry
-; GISel-MOPS-O3-NEXT: mov w8, #10 // =0xa
-; GISel-MOPS-O3-NEXT: cpyp [x0]!, [x1]!, x8!
-; GISel-MOPS-O3-NEXT: cpym [x0]!, [x1]!, x8!
-; GISel-MOPS-O3-NEXT: cpye [x0]!, [x1]!, x8!
+; GISel-MOPS-O3-NEXT: ldr x8, [x1]
+; GISel-MOPS-O3-NEXT: ldrh w9, [x1, #8]
+; GISel-MOPS-O3-NEXT: str x8, [x0]
+; GISel-MOPS-O3-NEXT: strh w9, [x0, #8]
; GISel-MOPS-O3-NEXT: ret
;
; SDAG-WITHOUT-MOPS-O2-LABEL: memmove_10_volatile:
diff --git a/llvm/test/CodeGen/AArch64/aarch64-smull.ll b/llvm/test/CodeGen/AArch64/aarch64-smull.ll
index 2f23a32..6e5c666 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-smull.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-smull.ll
@@ -2264,33 +2264,12 @@ define <2 x i64> @lsr_const(<2 x i64> %a, <2 x i64> %b) {
}
define <2 x i64> @asr(<2 x i64> %a, <2 x i64> %b) {
-; CHECK-NEON-LABEL: asr:
-; CHECK-NEON: // %bb.0:
-; CHECK-NEON-NEXT: shrn v0.2s, v0.2d, #32
-; CHECK-NEON-NEXT: shrn v1.2s, v1.2d, #32
-; CHECK-NEON-NEXT: smull v0.2d, v0.2s, v1.2s
-; CHECK-NEON-NEXT: ret
-;
-; CHECK-SVE-LABEL: asr:
-; CHECK-SVE: // %bb.0:
-; CHECK-SVE-NEXT: shrn v0.2s, v0.2d, #32
-; CHECK-SVE-NEXT: shrn v1.2s, v1.2d, #32
-; CHECK-SVE-NEXT: smull v0.2d, v0.2s, v1.2s
-; CHECK-SVE-NEXT: ret
-;
-; CHECK-GI-LABEL: asr:
-; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: sshr v0.2d, v0.2d, #32
-; CHECK-GI-NEXT: sshr v1.2d, v1.2d, #32
-; CHECK-GI-NEXT: fmov x10, d0
-; CHECK-GI-NEXT: fmov x11, d1
-; CHECK-GI-NEXT: mov x8, v0.d[1]
-; CHECK-GI-NEXT: mov x9, v1.d[1]
-; CHECK-GI-NEXT: mul x10, x10, x11
-; CHECK-GI-NEXT: mul x8, x8, x9
-; CHECK-GI-NEXT: fmov d0, x10
-; CHECK-GI-NEXT: mov v0.d[1], x8
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: asr:
+; CHECK: // %bb.0:
+; CHECK-NEXT: shrn v0.2s, v0.2d, #32
+; CHECK-NEXT: shrn v1.2s, v1.2d, #32
+; CHECK-NEXT: smull v0.2d, v0.2s, v1.2s
+; CHECK-NEXT: ret
%x = ashr <2 x i64> %a, <i64 32, i64 32>
%y = ashr <2 x i64> %b, <i64 32, i64 32>
%z = mul nsw <2 x i64> %x, %y
@@ -2298,34 +2277,12 @@ define <2 x i64> @asr(<2 x i64> %a, <2 x i64> %b) {
}
define <2 x i64> @asr_const(<2 x i64> %a, <2 x i64> %b) {
-; CHECK-NEON-LABEL: asr_const:
-; CHECK-NEON: // %bb.0:
-; CHECK-NEON-NEXT: movi v1.2s, #31
-; CHECK-NEON-NEXT: shrn v0.2s, v0.2d, #32
-; CHECK-NEON-NEXT: smull v0.2d, v0.2s, v1.2s
-; CHECK-NEON-NEXT: ret
-;
-; CHECK-SVE-LABEL: asr_const:
-; CHECK-SVE: // %bb.0:
-; CHECK-SVE-NEXT: movi v1.2s, #31
-; CHECK-SVE-NEXT: shrn v0.2s, v0.2d, #32
-; CHECK-SVE-NEXT: smull v0.2d, v0.2s, v1.2s
-; CHECK-SVE-NEXT: ret
-;
-; CHECK-GI-LABEL: asr_const:
-; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: adrp x8, .LCPI81_0
-; CHECK-GI-NEXT: sshr v0.2d, v0.2d, #32
-; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI81_0]
-; CHECK-GI-NEXT: fmov x10, d0
-; CHECK-GI-NEXT: fmov x11, d1
-; CHECK-GI-NEXT: mov x8, v0.d[1]
-; CHECK-GI-NEXT: mov x9, v1.d[1]
-; CHECK-GI-NEXT: mul x10, x10, x11
-; CHECK-GI-NEXT: mul x8, x8, x9
-; CHECK-GI-NEXT: fmov d0, x10
-; CHECK-GI-NEXT: mov v0.d[1], x8
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: asr_const:
+; CHECK: // %bb.0:
+; CHECK-NEXT: movi v1.2s, #31
+; CHECK-NEXT: shrn v0.2s, v0.2d, #32
+; CHECK-NEXT: smull v0.2d, v0.2s, v1.2s
+; CHECK-NEXT: ret
%x = ashr <2 x i64> %a, <i64 32, i64 32>
%z = mul nsw <2 x i64> %x, <i64 31, i64 31>
ret <2 x i64> %z
diff --git a/llvm/test/CodeGen/AArch64/aarch64-split-and-bitmask-immediate.ll b/llvm/test/CodeGen/AArch64/aarch64-split-and-bitmask-immediate.ll
index e31c9a0..113eb14 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-split-and-bitmask-immediate.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-split-and-bitmask-immediate.ll
@@ -263,3 +263,110 @@ entry:
%conv = zext i1 %cmp to i8
ret i8 %conv
}
+
+; Test ANDS.
+define i32 @test1_ands(i32 %a) {
+; CHECK-LABEL: test1_ands:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: and w8, w0, #0x3ffc00
+; CHECK-NEXT: ands w8, w8, #0xffe007ff
+; CHECK-NEXT: csel w0, w0, w8, eq
+; CHECK-NEXT: ret
+entry:
+ %ands = and i32 %a, 2098176
+ %c = icmp eq i32 %ands, 0
+ %r = select i1 %c, i32 %a, i32 %ands
+ ret i32 %r
+}
+
+; This constant should not be split because it can be handled by one mov.
+define i32 @test2_ands(i32 %a) {
+; CHECK-LABEL: test2_ands:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mov w8, #135 // =0x87
+; CHECK-NEXT: ands w8, w0, w8
+; CHECK-NEXT: csel w0, w0, w8, eq
+; CHECK-NEXT: ret
+entry:
+ %ands = and i32 %a, 135
+ %c = icmp eq i32 %ands, 0
+ %r = select i1 %c, i32 %a, i32 %ands
+ ret i32 %r
+}
+
+; This constant should not be split because the split immediate is not valid
+; bitmask immediate.
+define i32 @test3_ands(i32 %a) {
+; CHECK-LABEL: test3_ands:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mov w8, #1024 // =0x400
+; CHECK-NEXT: movk w8, #33, lsl #16
+; CHECK-NEXT: ands w8, w0, w8
+; CHECK-NEXT: csel w0, w0, w8, eq
+; CHECK-NEXT: ret
+entry:
+ %ands = and i32 %a, 2163712
+ %c = icmp eq i32 %ands, 0
+ %r = select i1 %c, i32 %a, i32 %ands
+ ret i32 %r
+}
+
+define i64 @test4_ands(i64 %a) {
+; CHECK-LABEL: test4_ands:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: and x8, x0, #0x3ffc00
+; CHECK-NEXT: ands x8, x8, #0xffffffffffe007ff
+; CHECK-NEXT: csel x0, x0, x8, eq
+; CHECK-NEXT: ret
+entry:
+ %ands = and i64 %a, 2098176
+ %c = icmp eq i64 %ands, 0
+ %r = select i1 %c, i64 %a, i64 %ands
+ ret i64 %r
+}
+
+define i64 @test5_ands(i64 %a) {
+; CHECK-LABEL: test5_ands:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: and x8, x0, #0x3ffffc000
+; CHECK-NEXT: ands x8, x8, #0xfffffffe00007fff
+; CHECK-NEXT: csel x0, x0, x8, eq
+; CHECK-NEXT: ret
+entry:
+ %ands = and i64 %a, 8589950976
+ %c = icmp eq i64 %ands, 0
+ %r = select i1 %c, i64 %a, i64 %ands
+ ret i64 %r
+}
+
+; This constant should not be split because it can be handled by one mov.
+define i64 @test6_ands(i64 %a) {
+; CHECK-LABEL: test6_ands:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mov w8, #135 // =0x87
+; CHECK-NEXT: ands x8, x0, x8
+; CHECK-NEXT: csel x0, x0, x8, eq
+; CHECK-NEXT: ret
+entry:
+ %ands = and i64 %a, 135
+ %c = icmp eq i64 %ands, 0
+ %r = select i1 %c, i64 %a, i64 %ands
+ ret i64 %r
+}
+
+; This constant should not be split because the split immediate is not valid
+; bitmask immediate.
+define i64 @test7_ands(i64 %a) {
+; CHECK-LABEL: test7_ands:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mov w8, #1024 // =0x400
+; CHECK-NEXT: movk w8, #33, lsl #16
+; CHECK-NEXT: ands x8, x0, x8
+; CHECK-NEXT: csel x0, x0, x8, eq
+; CHECK-NEXT: ret
+entry:
+ %ands = and i64 %a, 2163712
+ %c = icmp eq i64 %ands, 0
+ %r = select i1 %c, i64 %a, i64 %ands
+ ret i64 %r
+}
diff --git a/llvm/test/CodeGen/AArch64/arm64-neon-3vdiff.ll b/llvm/test/CodeGen/AArch64/arm64-neon-3vdiff.ll
index bd28d13..256ff94 100644
--- a/llvm/test/CodeGen/AArch64/arm64-neon-3vdiff.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-neon-3vdiff.ll
@@ -1,5 +1,11 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon,+aes | FileCheck %s
+; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon,+aes | FileCheck %s --check-prefixes=CHECK,CHECK-SD
+; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon,+aes -global-isel -global-isel-abort=2 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI
+
+; CHECK-GI: warning: Instruction selection used fallback path for test_vmull_p8
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_vmull_high_p8
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_vmull_p64
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_vmull_high_p64
declare <8 x i16> @llvm.aarch64.neon.pmull.v8i16(<8 x i8>, <8 x i8>)
declare <16 x i8> @llvm.aarch64.neon.pmull64(i64, i64) #5
@@ -101,11 +107,18 @@ entry:
}
define <8 x i16> @test_vaddl_a8(<8 x i8> %a, <8 x i8> %b) {
-; CHECK-LABEL: test_vaddl_a8:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: uaddl v0.8h, v0.8b, v1.8b
-; CHECK-NEXT: bic v0.8h, #255, lsl #8
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_vaddl_a8:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: uaddl v0.8h, v0.8b, v1.8b
+; CHECK-SD-NEXT: bic v0.8h, #255, lsl #8
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_vaddl_a8:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: movi v2.2d, #0xff00ff00ff00ff
+; CHECK-GI-NEXT: uaddl v0.8h, v0.8b, v1.8b
+; CHECK-GI-NEXT: and v0.16b, v0.16b, v2.16b
+; CHECK-GI-NEXT: ret
entry:
%vmovl.i.i = zext <8 x i8> %a to <8 x i16>
%vmovl.i2.i = zext <8 x i8> %b to <8 x i16>
@@ -229,11 +242,18 @@ entry:
}
define <8 x i16> @test_vaddl_high_a8(<16 x i8> %a, <16 x i8> %b) {
-; CHECK-LABEL: test_vaddl_high_a8:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: uaddl2 v0.8h, v0.16b, v1.16b
-; CHECK-NEXT: bic v0.8h, #255, lsl #8
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_vaddl_high_a8:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: uaddl2 v0.8h, v0.16b, v1.16b
+; CHECK-SD-NEXT: bic v0.8h, #255, lsl #8
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_vaddl_high_a8:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: movi v2.2d, #0xff00ff00ff00ff
+; CHECK-GI-NEXT: uaddl2 v0.8h, v0.16b, v1.16b
+; CHECK-GI-NEXT: and v0.16b, v0.16b, v2.16b
+; CHECK-GI-NEXT: ret
entry:
%shuffle.i.i.i = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%0 = zext <8 x i8> %shuffle.i.i.i to <8 x i16>
@@ -345,11 +365,18 @@ entry:
}
define <8 x i16> @test_vaddw_a8(<8 x i16> %a, <8 x i8> %b) {
-; CHECK-LABEL: test_vaddw_a8:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: uaddw v0.8h, v0.8h, v1.8b
-; CHECK-NEXT: bic v0.8h, #255, lsl #8
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_vaddw_a8:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: uaddw v0.8h, v0.8h, v1.8b
+; CHECK-SD-NEXT: bic v0.8h, #255, lsl #8
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_vaddw_a8:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: movi v2.2d, #0xff00ff00ff00ff
+; CHECK-GI-NEXT: uaddw v0.8h, v0.8h, v1.8b
+; CHECK-GI-NEXT: and v0.16b, v0.16b, v2.16b
+; CHECK-GI-NEXT: ret
entry:
%vmovl.i.i = zext <8 x i8> %b to <8 x i16>
%add.i = add <8 x i16> %vmovl.i.i, %a
@@ -458,11 +485,18 @@ entry:
}
define <8 x i16> @test_vaddw_high_a8(<8 x i16> %a, <16 x i8> %b) {
-; CHECK-LABEL: test_vaddw_high_a8:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: uaddw2 v0.8h, v0.8h, v1.16b
-; CHECK-NEXT: bic v0.8h, #255, lsl #8
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_vaddw_high_a8:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: uaddw2 v0.8h, v0.8h, v1.16b
+; CHECK-SD-NEXT: bic v0.8h, #255, lsl #8
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_vaddw_high_a8:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: movi v2.2d, #0xff00ff00ff00ff
+; CHECK-GI-NEXT: uaddw2 v0.8h, v0.8h, v1.16b
+; CHECK-GI-NEXT: and v0.16b, v0.16b, v2.16b
+; CHECK-GI-NEXT: ret
entry:
%shuffle.i.i.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%0 = zext <8 x i8> %shuffle.i.i.i to <8 x i16>
@@ -574,11 +608,18 @@ entry:
}
define <8 x i16> @test_vsubl_a8(<8 x i8> %a, <8 x i8> %b) {
-; CHECK-LABEL: test_vsubl_a8:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: usubl v0.8h, v0.8b, v1.8b
-; CHECK-NEXT: bic v0.8h, #255, lsl #8
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_vsubl_a8:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: usubl v0.8h, v0.8b, v1.8b
+; CHECK-SD-NEXT: bic v0.8h, #255, lsl #8
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_vsubl_a8:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: movi v2.2d, #0xff00ff00ff00ff
+; CHECK-GI-NEXT: usubl v0.8h, v0.8b, v1.8b
+; CHECK-GI-NEXT: and v0.16b, v0.16b, v2.16b
+; CHECK-GI-NEXT: ret
entry:
%vmovl.i.i = zext <8 x i8> %a to <8 x i16>
%vmovl.i2.i = zext <8 x i8> %b to <8 x i16>
@@ -702,11 +743,18 @@ entry:
}
define <8 x i16> @test_vsubl_high_a8(<16 x i8> %a, <16 x i8> %b) {
-; CHECK-LABEL: test_vsubl_high_a8:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: usubl2 v0.8h, v0.16b, v1.16b
-; CHECK-NEXT: bic v0.8h, #255, lsl #8
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_vsubl_high_a8:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: usubl2 v0.8h, v0.16b, v1.16b
+; CHECK-SD-NEXT: bic v0.8h, #255, lsl #8
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_vsubl_high_a8:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: movi v2.2d, #0xff00ff00ff00ff
+; CHECK-GI-NEXT: usubl2 v0.8h, v0.16b, v1.16b
+; CHECK-GI-NEXT: and v0.16b, v0.16b, v2.16b
+; CHECK-GI-NEXT: ret
entry:
%shuffle.i.i.i = shufflevector <16 x i8> %a, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%0 = zext <8 x i8> %shuffle.i.i.i to <8 x i16>
@@ -818,11 +866,18 @@ entry:
}
define <8 x i16> @test_vsubw_a8(<8 x i16> %a, <8 x i8> %b) {
-; CHECK-LABEL: test_vsubw_a8:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: usubw v0.8h, v0.8h, v1.8b
-; CHECK-NEXT: bic v0.8h, #255, lsl #8
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_vsubw_a8:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: usubw v0.8h, v0.8h, v1.8b
+; CHECK-SD-NEXT: bic v0.8h, #255, lsl #8
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_vsubw_a8:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: movi v2.2d, #0xff00ff00ff00ff
+; CHECK-GI-NEXT: usubw v0.8h, v0.8h, v1.8b
+; CHECK-GI-NEXT: and v0.16b, v0.16b, v2.16b
+; CHECK-GI-NEXT: ret
entry:
%vmovl.i.i = zext <8 x i8> %b to <8 x i16>
%sub.i = sub <8 x i16> %a, %vmovl.i.i
@@ -931,11 +986,18 @@ entry:
}
define <8 x i16> @test_vsubw_high_a8(<8 x i16> %a, <16 x i8> %b) {
-; CHECK-LABEL: test_vsubw_high_a8:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: usubw2 v0.8h, v0.8h, v1.16b
-; CHECK-NEXT: bic v0.8h, #255, lsl #8
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_vsubw_high_a8:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: usubw2 v0.8h, v0.8h, v1.16b
+; CHECK-SD-NEXT: bic v0.8h, #255, lsl #8
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_vsubw_high_a8:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: movi v2.2d, #0xff00ff00ff00ff
+; CHECK-GI-NEXT: usubw2 v0.8h, v0.8h, v1.16b
+; CHECK-GI-NEXT: and v0.16b, v0.16b, v2.16b
+; CHECK-GI-NEXT: ret
entry:
%shuffle.i.i.i = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%0 = zext <8 x i8> %shuffle.i.i.i to <8 x i16>
@@ -975,10 +1037,16 @@ entry:
}
define <8 x i8> @test_vaddhn_s16(<8 x i16> %a, <8 x i16> %b) {
-; CHECK-LABEL: test_vaddhn_s16:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: addhn v0.8b, v0.8h, v1.8h
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_vaddhn_s16:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: addhn v0.8b, v0.8h, v1.8h
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_vaddhn_s16:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: add v0.8h, v0.8h, v1.8h
+; CHECK-GI-NEXT: shrn v0.8b, v0.8h, #8
+; CHECK-GI-NEXT: ret
entry:
%vaddhn.i = add <8 x i16> %a, %b
%vaddhn1.i = lshr <8 x i16> %vaddhn.i, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
@@ -987,10 +1055,16 @@ entry:
}
define <4 x i16> @test_vaddhn_s32(<4 x i32> %a, <4 x i32> %b) {
-; CHECK-LABEL: test_vaddhn_s32:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: addhn v0.4h, v0.4s, v1.4s
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_vaddhn_s32:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: addhn v0.4h, v0.4s, v1.4s
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_vaddhn_s32:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: add v0.4s, v0.4s, v1.4s
+; CHECK-GI-NEXT: shrn v0.4h, v0.4s, #16
+; CHECK-GI-NEXT: ret
entry:
%vaddhn.i = add <4 x i32> %a, %b
%vaddhn1.i = lshr <4 x i32> %vaddhn.i, <i32 16, i32 16, i32 16, i32 16>
@@ -999,10 +1073,16 @@ entry:
}
define <2 x i32> @test_vaddhn_s64(<2 x i64> %a, <2 x i64> %b) {
-; CHECK-LABEL: test_vaddhn_s64:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: addhn v0.2s, v0.2d, v1.2d
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_vaddhn_s64:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: addhn v0.2s, v0.2d, v1.2d
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_vaddhn_s64:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: add v0.2d, v0.2d, v1.2d
+; CHECK-GI-NEXT: shrn v0.2s, v0.2d, #32
+; CHECK-GI-NEXT: ret
entry:
%vaddhn.i = add <2 x i64> %a, %b
%vaddhn1.i = lshr <2 x i64> %vaddhn.i, <i64 32, i64 32>
@@ -1011,10 +1091,16 @@ entry:
}
define <8 x i8> @test_vaddhn_u16(<8 x i16> %a, <8 x i16> %b) {
-; CHECK-LABEL: test_vaddhn_u16:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: addhn v0.8b, v0.8h, v1.8h
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_vaddhn_u16:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: addhn v0.8b, v0.8h, v1.8h
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_vaddhn_u16:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: add v0.8h, v0.8h, v1.8h
+; CHECK-GI-NEXT: shrn v0.8b, v0.8h, #8
+; CHECK-GI-NEXT: ret
entry:
%vaddhn.i = add <8 x i16> %a, %b
%vaddhn1.i = lshr <8 x i16> %vaddhn.i, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
@@ -1023,10 +1109,16 @@ entry:
}
define <4 x i16> @test_vaddhn_u32(<4 x i32> %a, <4 x i32> %b) {
-; CHECK-LABEL: test_vaddhn_u32:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: addhn v0.4h, v0.4s, v1.4s
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_vaddhn_u32:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: addhn v0.4h, v0.4s, v1.4s
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_vaddhn_u32:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: add v0.4s, v0.4s, v1.4s
+; CHECK-GI-NEXT: shrn v0.4h, v0.4s, #16
+; CHECK-GI-NEXT: ret
entry:
%vaddhn.i = add <4 x i32> %a, %b
%vaddhn1.i = lshr <4 x i32> %vaddhn.i, <i32 16, i32 16, i32 16, i32 16>
@@ -1035,10 +1127,16 @@ entry:
}
define <2 x i32> @test_vaddhn_u64(<2 x i64> %a, <2 x i64> %b) {
-; CHECK-LABEL: test_vaddhn_u64:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: addhn v0.2s, v0.2d, v1.2d
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_vaddhn_u64:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: addhn v0.2s, v0.2d, v1.2d
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_vaddhn_u64:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: add v0.2d, v0.2d, v1.2d
+; CHECK-GI-NEXT: shrn v0.2s, v0.2d, #32
+; CHECK-GI-NEXT: ret
entry:
%vaddhn.i = add <2 x i64> %a, %b
%vaddhn1.i = lshr <2 x i64> %vaddhn.i, <i64 32, i64 32>
@@ -1047,11 +1145,20 @@ entry:
}
define <16 x i8> @test_vaddhn_high_s16(<8 x i8> %r, <8 x i16> %a, <8 x i16> %b) {
-; CHECK-LABEL: test_vaddhn_high_s16:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT: addhn2 v0.16b, v1.8h, v2.8h
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_vaddhn_high_s16:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-NEXT: addhn2 v0.16b, v1.8h, v2.8h
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_vaddhn_high_s16:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: add v1.8h, v1.8h, v2.8h
+; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-NEXT: shrn v1.8b, v1.8h, #8
+; CHECK-GI-NEXT: fmov x8, d1
+; CHECK-GI-NEXT: mov v0.d[1], x8
+; CHECK-GI-NEXT: ret
entry:
%vaddhn.i.i = add <8 x i16> %a, %b
%vaddhn1.i.i = lshr <8 x i16> %vaddhn.i.i, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
@@ -1064,11 +1171,20 @@ entry:
}
define <8 x i16> @test_vaddhn_high_s32(<4 x i16> %r, <4 x i32> %a, <4 x i32> %b) {
-; CHECK-LABEL: test_vaddhn_high_s32:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT: addhn2 v0.8h, v1.4s, v2.4s
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_vaddhn_high_s32:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-NEXT: addhn2 v0.8h, v1.4s, v2.4s
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_vaddhn_high_s32:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: add v1.4s, v1.4s, v2.4s
+; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-NEXT: shrn v1.4h, v1.4s, #16
+; CHECK-GI-NEXT: fmov x8, d1
+; CHECK-GI-NEXT: mov v0.d[1], x8
+; CHECK-GI-NEXT: ret
entry:
%vaddhn.i.i = add <4 x i32> %a, %b
%vaddhn1.i.i = lshr <4 x i32> %vaddhn.i.i, <i32 16, i32 16, i32 16, i32 16>
@@ -1081,11 +1197,20 @@ entry:
}
define <4 x i32> @test_vaddhn_high_s64(<2 x i32> %r, <2 x i64> %a, <2 x i64> %b) {
-; CHECK-LABEL: test_vaddhn_high_s64:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT: addhn2 v0.4s, v1.2d, v2.2d
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_vaddhn_high_s64:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-NEXT: addhn2 v0.4s, v1.2d, v2.2d
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_vaddhn_high_s64:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: add v1.2d, v1.2d, v2.2d
+; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-NEXT: shrn v1.2s, v1.2d, #32
+; CHECK-GI-NEXT: fmov x8, d1
+; CHECK-GI-NEXT: mov v0.d[1], x8
+; CHECK-GI-NEXT: ret
entry:
%vaddhn.i.i = add <2 x i64> %a, %b
%vaddhn1.i.i = lshr <2 x i64> %vaddhn.i.i, <i64 32, i64 32>
@@ -1098,11 +1223,20 @@ entry:
}
define <16 x i8> @test_vaddhn_high_u16(<8 x i8> %r, <8 x i16> %a, <8 x i16> %b) {
-; CHECK-LABEL: test_vaddhn_high_u16:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT: addhn2 v0.16b, v1.8h, v2.8h
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_vaddhn_high_u16:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-NEXT: addhn2 v0.16b, v1.8h, v2.8h
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_vaddhn_high_u16:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: add v1.8h, v1.8h, v2.8h
+; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-NEXT: shrn v1.8b, v1.8h, #8
+; CHECK-GI-NEXT: fmov x8, d1
+; CHECK-GI-NEXT: mov v0.d[1], x8
+; CHECK-GI-NEXT: ret
entry:
%vaddhn.i.i = add <8 x i16> %a, %b
%vaddhn1.i.i = lshr <8 x i16> %vaddhn.i.i, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
@@ -1115,11 +1249,20 @@ entry:
}
define <8 x i16> @test_vaddhn_high_u32(<4 x i16> %r, <4 x i32> %a, <4 x i32> %b) {
-; CHECK-LABEL: test_vaddhn_high_u32:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT: addhn2 v0.8h, v1.4s, v2.4s
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_vaddhn_high_u32:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-NEXT: addhn2 v0.8h, v1.4s, v2.4s
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_vaddhn_high_u32:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: add v1.4s, v1.4s, v2.4s
+; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-NEXT: shrn v1.4h, v1.4s, #16
+; CHECK-GI-NEXT: fmov x8, d1
+; CHECK-GI-NEXT: mov v0.d[1], x8
+; CHECK-GI-NEXT: ret
entry:
%vaddhn.i.i = add <4 x i32> %a, %b
%vaddhn1.i.i = lshr <4 x i32> %vaddhn.i.i, <i32 16, i32 16, i32 16, i32 16>
@@ -1132,11 +1275,20 @@ entry:
}
define <4 x i32> @test_vaddhn_high_u64(<2 x i32> %r, <2 x i64> %a, <2 x i64> %b) {
-; CHECK-LABEL: test_vaddhn_high_u64:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT: addhn2 v0.4s, v1.2d, v2.2d
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_vaddhn_high_u64:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-NEXT: addhn2 v0.4s, v1.2d, v2.2d
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_vaddhn_high_u64:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: add v1.2d, v1.2d, v2.2d
+; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-NEXT: shrn v1.2s, v1.2d, #32
+; CHECK-GI-NEXT: fmov x8, d1
+; CHECK-GI-NEXT: mov v0.d[1], x8
+; CHECK-GI-NEXT: ret
entry:
%vaddhn.i.i = add <2 x i64> %a, %b
%vaddhn1.i.i = lshr <2 x i64> %vaddhn.i.i, <i64 32, i64 32>
@@ -1209,11 +1361,19 @@ entry:
}
define <16 x i8> @test_vraddhn_high_s16(<8 x i8> %r, <8 x i16> %a, <8 x i16> %b) {
-; CHECK-LABEL: test_vraddhn_high_s16:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT: raddhn2 v0.16b, v1.8h, v2.8h
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_vraddhn_high_s16:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-NEXT: raddhn2 v0.16b, v1.8h, v2.8h
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_vraddhn_high_s16:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: raddhn v1.8b, v1.8h, v2.8h
+; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-NEXT: fmov x8, d1
+; CHECK-GI-NEXT: mov v0.d[1], x8
+; CHECK-GI-NEXT: ret
entry:
%vraddhn2.i.i = tail call <8 x i8> @llvm.aarch64.neon.raddhn.v8i8(<8 x i16> %a, <8 x i16> %b)
%0 = bitcast <8 x i8> %r to <1 x i64>
@@ -1224,11 +1384,19 @@ entry:
}
define <8 x i16> @test_vraddhn_high_s32(<4 x i16> %r, <4 x i32> %a, <4 x i32> %b) {
-; CHECK-LABEL: test_vraddhn_high_s32:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT: raddhn2 v0.8h, v1.4s, v2.4s
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_vraddhn_high_s32:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-NEXT: raddhn2 v0.8h, v1.4s, v2.4s
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_vraddhn_high_s32:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: raddhn v1.4h, v1.4s, v2.4s
+; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-NEXT: fmov x8, d1
+; CHECK-GI-NEXT: mov v0.d[1], x8
+; CHECK-GI-NEXT: ret
entry:
%vraddhn2.i.i = tail call <4 x i16> @llvm.aarch64.neon.raddhn.v4i16(<4 x i32> %a, <4 x i32> %b)
%0 = bitcast <4 x i16> %r to <1 x i64>
@@ -1239,11 +1407,19 @@ entry:
}
define <4 x i32> @test_vraddhn_high_s64(<2 x i32> %r, <2 x i64> %a, <2 x i64> %b) {
-; CHECK-LABEL: test_vraddhn_high_s64:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT: raddhn2 v0.4s, v1.2d, v2.2d
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_vraddhn_high_s64:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-NEXT: raddhn2 v0.4s, v1.2d, v2.2d
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_vraddhn_high_s64:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: raddhn v1.2s, v1.2d, v2.2d
+; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-NEXT: fmov x8, d1
+; CHECK-GI-NEXT: mov v0.d[1], x8
+; CHECK-GI-NEXT: ret
entry:
%vraddhn2.i.i = tail call <2 x i32> @llvm.aarch64.neon.raddhn.v2i32(<2 x i64> %a, <2 x i64> %b)
%0 = bitcast <2 x i32> %r to <1 x i64>
@@ -1254,11 +1430,19 @@ entry:
}
define <16 x i8> @test_vraddhn_high_u16(<8 x i8> %r, <8 x i16> %a, <8 x i16> %b) {
-; CHECK-LABEL: test_vraddhn_high_u16:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT: raddhn2 v0.16b, v1.8h, v2.8h
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_vraddhn_high_u16:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-NEXT: raddhn2 v0.16b, v1.8h, v2.8h
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_vraddhn_high_u16:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: raddhn v1.8b, v1.8h, v2.8h
+; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-NEXT: fmov x8, d1
+; CHECK-GI-NEXT: mov v0.d[1], x8
+; CHECK-GI-NEXT: ret
entry:
%vraddhn2.i.i = tail call <8 x i8> @llvm.aarch64.neon.raddhn.v8i8(<8 x i16> %a, <8 x i16> %b)
%0 = bitcast <8 x i8> %r to <1 x i64>
@@ -1269,11 +1453,19 @@ entry:
}
define <8 x i16> @test_vraddhn_high_u32(<4 x i16> %r, <4 x i32> %a, <4 x i32> %b) {
-; CHECK-LABEL: test_vraddhn_high_u32:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT: raddhn2 v0.8h, v1.4s, v2.4s
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_vraddhn_high_u32:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-NEXT: raddhn2 v0.8h, v1.4s, v2.4s
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_vraddhn_high_u32:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: raddhn v1.4h, v1.4s, v2.4s
+; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-NEXT: fmov x8, d1
+; CHECK-GI-NEXT: mov v0.d[1], x8
+; CHECK-GI-NEXT: ret
entry:
%vraddhn2.i.i = tail call <4 x i16> @llvm.aarch64.neon.raddhn.v4i16(<4 x i32> %a, <4 x i32> %b)
%0 = bitcast <4 x i16> %r to <1 x i64>
@@ -1284,11 +1476,19 @@ entry:
}
define <4 x i32> @test_vraddhn_high_u64(<2 x i32> %r, <2 x i64> %a, <2 x i64> %b) {
-; CHECK-LABEL: test_vraddhn_high_u64:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT: raddhn2 v0.4s, v1.2d, v2.2d
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_vraddhn_high_u64:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-NEXT: raddhn2 v0.4s, v1.2d, v2.2d
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_vraddhn_high_u64:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: raddhn v1.2s, v1.2d, v2.2d
+; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-NEXT: fmov x8, d1
+; CHECK-GI-NEXT: mov v0.d[1], x8
+; CHECK-GI-NEXT: ret
entry:
%vraddhn2.i.i = tail call <2 x i32> @llvm.aarch64.neon.raddhn.v2i32(<2 x i64> %a, <2 x i64> %b)
%0 = bitcast <2 x i32> %r to <1 x i64>
@@ -1299,10 +1499,16 @@ entry:
}
define <8 x i8> @test_vsubhn_s16(<8 x i16> %a, <8 x i16> %b) {
-; CHECK-LABEL: test_vsubhn_s16:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: subhn v0.8b, v0.8h, v1.8h
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_vsubhn_s16:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: subhn v0.8b, v0.8h, v1.8h
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_vsubhn_s16:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: sub v0.8h, v0.8h, v1.8h
+; CHECK-GI-NEXT: shrn v0.8b, v0.8h, #8
+; CHECK-GI-NEXT: ret
entry:
%vsubhn.i = sub <8 x i16> %a, %b
%vsubhn1.i = lshr <8 x i16> %vsubhn.i, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
@@ -1311,10 +1517,16 @@ entry:
}
define <4 x i16> @test_vsubhn_s32(<4 x i32> %a, <4 x i32> %b) {
-; CHECK-LABEL: test_vsubhn_s32:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: subhn v0.4h, v0.4s, v1.4s
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_vsubhn_s32:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: subhn v0.4h, v0.4s, v1.4s
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_vsubhn_s32:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: sub v0.4s, v0.4s, v1.4s
+; CHECK-GI-NEXT: shrn v0.4h, v0.4s, #16
+; CHECK-GI-NEXT: ret
entry:
%vsubhn.i = sub <4 x i32> %a, %b
%vsubhn1.i = lshr <4 x i32> %vsubhn.i, <i32 16, i32 16, i32 16, i32 16>
@@ -1323,10 +1535,16 @@ entry:
}
define <2 x i32> @test_vsubhn_s64(<2 x i64> %a, <2 x i64> %b) {
-; CHECK-LABEL: test_vsubhn_s64:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: subhn v0.2s, v0.2d, v1.2d
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_vsubhn_s64:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: subhn v0.2s, v0.2d, v1.2d
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_vsubhn_s64:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: sub v0.2d, v0.2d, v1.2d
+; CHECK-GI-NEXT: shrn v0.2s, v0.2d, #32
+; CHECK-GI-NEXT: ret
entry:
%vsubhn.i = sub <2 x i64> %a, %b
%vsubhn1.i = lshr <2 x i64> %vsubhn.i, <i64 32, i64 32>
@@ -1335,10 +1553,16 @@ entry:
}
define <8 x i8> @test_vsubhn_u16(<8 x i16> %a, <8 x i16> %b) {
-; CHECK-LABEL: test_vsubhn_u16:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: subhn v0.8b, v0.8h, v1.8h
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_vsubhn_u16:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: subhn v0.8b, v0.8h, v1.8h
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_vsubhn_u16:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: sub v0.8h, v0.8h, v1.8h
+; CHECK-GI-NEXT: shrn v0.8b, v0.8h, #8
+; CHECK-GI-NEXT: ret
entry:
%vsubhn.i = sub <8 x i16> %a, %b
%vsubhn1.i = lshr <8 x i16> %vsubhn.i, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
@@ -1347,10 +1571,16 @@ entry:
}
define <4 x i16> @test_vsubhn_u32(<4 x i32> %a, <4 x i32> %b) {
-; CHECK-LABEL: test_vsubhn_u32:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: subhn v0.4h, v0.4s, v1.4s
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_vsubhn_u32:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: subhn v0.4h, v0.4s, v1.4s
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_vsubhn_u32:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: sub v0.4s, v0.4s, v1.4s
+; CHECK-GI-NEXT: shrn v0.4h, v0.4s, #16
+; CHECK-GI-NEXT: ret
entry:
%vsubhn.i = sub <4 x i32> %a, %b
%vsubhn1.i = lshr <4 x i32> %vsubhn.i, <i32 16, i32 16, i32 16, i32 16>
@@ -1359,10 +1589,16 @@ entry:
}
define <2 x i32> @test_vsubhn_u64(<2 x i64> %a, <2 x i64> %b) {
-; CHECK-LABEL: test_vsubhn_u64:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: subhn v0.2s, v0.2d, v1.2d
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_vsubhn_u64:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: subhn v0.2s, v0.2d, v1.2d
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_vsubhn_u64:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: sub v0.2d, v0.2d, v1.2d
+; CHECK-GI-NEXT: shrn v0.2s, v0.2d, #32
+; CHECK-GI-NEXT: ret
entry:
%vsubhn.i = sub <2 x i64> %a, %b
%vsubhn1.i = lshr <2 x i64> %vsubhn.i, <i64 32, i64 32>
@@ -1371,11 +1607,20 @@ entry:
}
define <16 x i8> @test_vsubhn_high_s16(<8 x i8> %r, <8 x i16> %a, <8 x i16> %b) {
-; CHECK-LABEL: test_vsubhn_high_s16:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT: subhn2 v0.16b, v1.8h, v2.8h
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_vsubhn_high_s16:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-NEXT: subhn2 v0.16b, v1.8h, v2.8h
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_vsubhn_high_s16:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: sub v1.8h, v1.8h, v2.8h
+; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-NEXT: shrn v1.8b, v1.8h, #8
+; CHECK-GI-NEXT: fmov x8, d1
+; CHECK-GI-NEXT: mov v0.d[1], x8
+; CHECK-GI-NEXT: ret
entry:
%vsubhn.i.i = sub <8 x i16> %a, %b
%vsubhn1.i.i = lshr <8 x i16> %vsubhn.i.i, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
@@ -1388,11 +1633,20 @@ entry:
}
define <8 x i16> @test_vsubhn_high_s32(<4 x i16> %r, <4 x i32> %a, <4 x i32> %b) {
-; CHECK-LABEL: test_vsubhn_high_s32:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT: subhn2 v0.8h, v1.4s, v2.4s
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_vsubhn_high_s32:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-NEXT: subhn2 v0.8h, v1.4s, v2.4s
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_vsubhn_high_s32:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: sub v1.4s, v1.4s, v2.4s
+; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-NEXT: shrn v1.4h, v1.4s, #16
+; CHECK-GI-NEXT: fmov x8, d1
+; CHECK-GI-NEXT: mov v0.d[1], x8
+; CHECK-GI-NEXT: ret
entry:
%vsubhn.i.i = sub <4 x i32> %a, %b
%vsubhn1.i.i = lshr <4 x i32> %vsubhn.i.i, <i32 16, i32 16, i32 16, i32 16>
@@ -1405,11 +1659,20 @@ entry:
}
define <4 x i32> @test_vsubhn_high_s64(<2 x i32> %r, <2 x i64> %a, <2 x i64> %b) {
-; CHECK-LABEL: test_vsubhn_high_s64:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT: subhn2 v0.4s, v1.2d, v2.2d
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_vsubhn_high_s64:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-NEXT: subhn2 v0.4s, v1.2d, v2.2d
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_vsubhn_high_s64:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: sub v1.2d, v1.2d, v2.2d
+; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-NEXT: shrn v1.2s, v1.2d, #32
+; CHECK-GI-NEXT: fmov x8, d1
+; CHECK-GI-NEXT: mov v0.d[1], x8
+; CHECK-GI-NEXT: ret
entry:
%vsubhn.i.i = sub <2 x i64> %a, %b
%vsubhn1.i.i = lshr <2 x i64> %vsubhn.i.i, <i64 32, i64 32>
@@ -1422,11 +1685,20 @@ entry:
}
define <16 x i8> @test_vsubhn_high_u16(<8 x i8> %r, <8 x i16> %a, <8 x i16> %b) {
-; CHECK-LABEL: test_vsubhn_high_u16:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT: subhn2 v0.16b, v1.8h, v2.8h
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_vsubhn_high_u16:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-NEXT: subhn2 v0.16b, v1.8h, v2.8h
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_vsubhn_high_u16:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: sub v1.8h, v1.8h, v2.8h
+; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-NEXT: shrn v1.8b, v1.8h, #8
+; CHECK-GI-NEXT: fmov x8, d1
+; CHECK-GI-NEXT: mov v0.d[1], x8
+; CHECK-GI-NEXT: ret
entry:
%vsubhn.i.i = sub <8 x i16> %a, %b
%vsubhn1.i.i = lshr <8 x i16> %vsubhn.i.i, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
@@ -1439,11 +1711,20 @@ entry:
}
define <8 x i16> @test_vsubhn_high_u32(<4 x i16> %r, <4 x i32> %a, <4 x i32> %b) {
-; CHECK-LABEL: test_vsubhn_high_u32:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT: subhn2 v0.8h, v1.4s, v2.4s
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_vsubhn_high_u32:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-NEXT: subhn2 v0.8h, v1.4s, v2.4s
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_vsubhn_high_u32:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: sub v1.4s, v1.4s, v2.4s
+; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-NEXT: shrn v1.4h, v1.4s, #16
+; CHECK-GI-NEXT: fmov x8, d1
+; CHECK-GI-NEXT: mov v0.d[1], x8
+; CHECK-GI-NEXT: ret
entry:
%vsubhn.i.i = sub <4 x i32> %a, %b
%vsubhn1.i.i = lshr <4 x i32> %vsubhn.i.i, <i32 16, i32 16, i32 16, i32 16>
@@ -1456,11 +1737,20 @@ entry:
}
define <4 x i32> @test_vsubhn_high_u64(<2 x i32> %r, <2 x i64> %a, <2 x i64> %b) {
-; CHECK-LABEL: test_vsubhn_high_u64:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT: subhn2 v0.4s, v1.2d, v2.2d
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_vsubhn_high_u64:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-NEXT: subhn2 v0.4s, v1.2d, v2.2d
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_vsubhn_high_u64:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: sub v1.2d, v1.2d, v2.2d
+; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-NEXT: shrn v1.2s, v1.2d, #32
+; CHECK-GI-NEXT: fmov x8, d1
+; CHECK-GI-NEXT: mov v0.d[1], x8
+; CHECK-GI-NEXT: ret
entry:
%vsubhn.i.i = sub <2 x i64> %a, %b
%vsubhn1.i.i = lshr <2 x i64> %vsubhn.i.i, <i64 32, i64 32>
@@ -1533,11 +1823,19 @@ entry:
}
define <16 x i8> @test_vrsubhn_high_s16(<8 x i8> %r, <8 x i16> %a, <8 x i16> %b) {
-; CHECK-LABEL: test_vrsubhn_high_s16:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT: rsubhn2 v0.16b, v1.8h, v2.8h
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_vrsubhn_high_s16:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-NEXT: rsubhn2 v0.16b, v1.8h, v2.8h
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_vrsubhn_high_s16:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: rsubhn v1.8b, v1.8h, v2.8h
+; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-NEXT: fmov x8, d1
+; CHECK-GI-NEXT: mov v0.d[1], x8
+; CHECK-GI-NEXT: ret
entry:
%vrsubhn2.i.i = tail call <8 x i8> @llvm.aarch64.neon.rsubhn.v8i8(<8 x i16> %a, <8 x i16> %b)
%0 = bitcast <8 x i8> %r to <1 x i64>
@@ -1548,11 +1846,19 @@ entry:
}
define <8 x i16> @test_vrsubhn_high_s32(<4 x i16> %r, <4 x i32> %a, <4 x i32> %b) {
-; CHECK-LABEL: test_vrsubhn_high_s32:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT: rsubhn2 v0.8h, v1.4s, v2.4s
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_vrsubhn_high_s32:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-NEXT: rsubhn2 v0.8h, v1.4s, v2.4s
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_vrsubhn_high_s32:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: rsubhn v1.4h, v1.4s, v2.4s
+; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-NEXT: fmov x8, d1
+; CHECK-GI-NEXT: mov v0.d[1], x8
+; CHECK-GI-NEXT: ret
entry:
%vrsubhn2.i.i = tail call <4 x i16> @llvm.aarch64.neon.rsubhn.v4i16(<4 x i32> %a, <4 x i32> %b)
%0 = bitcast <4 x i16> %r to <1 x i64>
@@ -1563,11 +1869,19 @@ entry:
}
define <4 x i32> @test_vrsubhn_high_s64(<2 x i32> %r, <2 x i64> %a, <2 x i64> %b) {
-; CHECK-LABEL: test_vrsubhn_high_s64:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT: rsubhn2 v0.4s, v1.2d, v2.2d
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_vrsubhn_high_s64:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-NEXT: rsubhn2 v0.4s, v1.2d, v2.2d
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_vrsubhn_high_s64:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: rsubhn v1.2s, v1.2d, v2.2d
+; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-NEXT: fmov x8, d1
+; CHECK-GI-NEXT: mov v0.d[1], x8
+; CHECK-GI-NEXT: ret
entry:
%vrsubhn2.i.i = tail call <2 x i32> @llvm.aarch64.neon.rsubhn.v2i32(<2 x i64> %a, <2 x i64> %b)
%0 = bitcast <2 x i32> %r to <1 x i64>
@@ -1578,11 +1892,19 @@ entry:
}
define <16 x i8> @test_vrsubhn_high_u16(<8 x i8> %r, <8 x i16> %a, <8 x i16> %b) {
-; CHECK-LABEL: test_vrsubhn_high_u16:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT: rsubhn2 v0.16b, v1.8h, v2.8h
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_vrsubhn_high_u16:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-NEXT: rsubhn2 v0.16b, v1.8h, v2.8h
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_vrsubhn_high_u16:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: rsubhn v1.8b, v1.8h, v2.8h
+; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-NEXT: fmov x8, d1
+; CHECK-GI-NEXT: mov v0.d[1], x8
+; CHECK-GI-NEXT: ret
entry:
%vrsubhn2.i.i = tail call <8 x i8> @llvm.aarch64.neon.rsubhn.v8i8(<8 x i16> %a, <8 x i16> %b)
%0 = bitcast <8 x i8> %r to <1 x i64>
@@ -1593,11 +1915,19 @@ entry:
}
define <8 x i16> @test_vrsubhn_high_u32(<4 x i16> %r, <4 x i32> %a, <4 x i32> %b) {
-; CHECK-LABEL: test_vrsubhn_high_u32:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT: rsubhn2 v0.8h, v1.4s, v2.4s
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_vrsubhn_high_u32:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-NEXT: rsubhn2 v0.8h, v1.4s, v2.4s
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_vrsubhn_high_u32:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: rsubhn v1.4h, v1.4s, v2.4s
+; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-NEXT: fmov x8, d1
+; CHECK-GI-NEXT: mov v0.d[1], x8
+; CHECK-GI-NEXT: ret
entry:
%vrsubhn2.i.i = tail call <4 x i16> @llvm.aarch64.neon.rsubhn.v4i16(<4 x i32> %a, <4 x i32> %b)
%0 = bitcast <4 x i16> %r to <1 x i64>
@@ -1608,11 +1938,19 @@ entry:
}
define <4 x i32> @test_vrsubhn_high_u64(<2 x i32> %r, <2 x i64> %a, <2 x i64> %b) {
-; CHECK-LABEL: test_vrsubhn_high_u64:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT: rsubhn2 v0.4s, v1.2d, v2.2d
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_vrsubhn_high_u64:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-SD-NEXT: rsubhn2 v0.4s, v1.2d, v2.2d
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_vrsubhn_high_u64:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: rsubhn v1.2s, v1.2d, v2.2d
+; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-NEXT: fmov x8, d1
+; CHECK-GI-NEXT: mov v0.d[1], x8
+; CHECK-GI-NEXT: ret
entry:
%vrsubhn2.i.i = tail call <2 x i32> @llvm.aarch64.neon.rsubhn.v2i32(<2 x i64> %a, <2 x i64> %b)
%0 = bitcast <2 x i32> %r to <1 x i64>
@@ -2535,21 +2873,40 @@ entry:
}
define <8 x i16> @cmplx_mul_combined_re_im(<8 x i16> noundef %a, i64 %scale.coerce) {
-; CHECK-LABEL: cmplx_mul_combined_re_im:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: lsr x8, x0, #16
-; CHECK-NEXT: movi v1.2d, #0xffff0000ffff0000
-; CHECK-NEXT: rev32 v4.8h, v0.8h
-; CHECK-NEXT: dup v2.8h, w8
-; CHECK-NEXT: sqneg v3.8h, v2.8h
-; CHECK-NEXT: bsl v1.16b, v2.16b, v3.16b
-; CHECK-NEXT: fmov d3, x0
-; CHECK-NEXT: sqdmull v2.4s, v4.4h, v1.4h
-; CHECK-NEXT: sqdmull2 v1.4s, v4.8h, v1.8h
-; CHECK-NEXT: sqdmlal v2.4s, v0.4h, v3.h[0]
-; CHECK-NEXT: sqdmlal2 v1.4s, v0.8h, v3.h[0]
-; CHECK-NEXT: uzp2 v0.8h, v2.8h, v1.8h
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: cmplx_mul_combined_re_im:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: lsr x8, x0, #16
+; CHECK-SD-NEXT: movi v1.2d, #0xffff0000ffff0000
+; CHECK-SD-NEXT: rev32 v4.8h, v0.8h
+; CHECK-SD-NEXT: dup v2.8h, w8
+; CHECK-SD-NEXT: sqneg v3.8h, v2.8h
+; CHECK-SD-NEXT: bsl v1.16b, v2.16b, v3.16b
+; CHECK-SD-NEXT: fmov d3, x0
+; CHECK-SD-NEXT: sqdmull v2.4s, v4.4h, v1.4h
+; CHECK-SD-NEXT: sqdmull2 v1.4s, v4.8h, v1.8h
+; CHECK-SD-NEXT: sqdmlal v2.4s, v0.4h, v3.h[0]
+; CHECK-SD-NEXT: sqdmlal2 v1.4s, v0.8h, v3.h[0]
+; CHECK-SD-NEXT: uzp2 v0.8h, v2.8h, v1.8h
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: cmplx_mul_combined_re_im:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: lsr x9, x0, #16
+; CHECK-GI-NEXT: adrp x8, .LCPI196_0
+; CHECK-GI-NEXT: rev32 v4.8h, v0.8h
+; CHECK-GI-NEXT: ldr q3, [x8, :lo12:.LCPI196_0]
+; CHECK-GI-NEXT: fmov d1, x9
+; CHECK-GI-NEXT: dup v2.8h, v1.h[0]
+; CHECK-GI-NEXT: sqneg v1.8h, v2.8h
+; CHECK-GI-NEXT: tbl v1.16b, { v1.16b, v2.16b }, v3.16b
+; CHECK-GI-NEXT: mov d2, v0.d[1]
+; CHECK-GI-NEXT: fmov d3, x0
+; CHECK-GI-NEXT: sqdmull v2.4s, v2.4h, v3.h[0]
+; CHECK-GI-NEXT: sqdmull v5.4s, v4.4h, v1.4h
+; CHECK-GI-NEXT: sqdmlal v5.4s, v0.4h, v3.h[0]
+; CHECK-GI-NEXT: sqdmlal2 v2.4s, v4.8h, v1.8h
+; CHECK-GI-NEXT: uzp2 v0.8h, v5.8h, v2.8h
+; CHECK-GI-NEXT: ret
entry:
%scale.sroa.2.0.extract.shift23 = lshr i64 %scale.coerce, 16
%shuffle.i = shufflevector <8 x i16> %a, <8 x i16> poison, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
diff --git a/llvm/test/CodeGen/AArch64/arm64-neon-aba-abd.ll b/llvm/test/CodeGen/AArch64/arm64-neon-aba-abd.ll
index cc9732b..6c7ddd9 100644
--- a/llvm/test/CodeGen/AArch64/arm64-neon-aba-abd.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-neon-aba-abd.ll
@@ -1,5 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=arm64-none-linux-gnu -mattr=+neon < %s | FileCheck %s
+; RUN: llc -mtriple=arm64-none-linux-gnu < %s | FileCheck %s --check-prefixes=CHECK,CHECK-SD
+; RUN: llc -mtriple=arm64-none-linux-gnu -global-isel < %s | FileCheck %s --check-prefixes=CHECK,CHECK-GI
declare <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8>, <8 x i8>)
declare <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8>, <8 x i8>)
@@ -197,11 +198,20 @@ define <2 x i32> @test_sabd_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
}
define <2 x i32> @test_sabd_v2i32_const() {
-; CHECK-LABEL: test_sabd_v2i32_const:
-; CHECK: // %bb.0:
-; CHECK-NEXT: adrp x8, .LCPI19_0
-; CHECK-NEXT: ldr d0, [x8, :lo12:.LCPI19_0]
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_sabd_v2i32_const:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: adrp x8, .LCPI19_0
+; CHECK-SD-NEXT: ldr d0, [x8, :lo12:.LCPI19_0]
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_sabd_v2i32_const:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: adrp x8, .LCPI19_1
+; CHECK-GI-NEXT: adrp x9, .LCPI19_0
+; CHECK-GI-NEXT: ldr d0, [x8, :lo12:.LCPI19_1]
+; CHECK-GI-NEXT: ldr d1, [x9, :lo12:.LCPI19_0]
+; CHECK-GI-NEXT: sabd v0.2s, v0.2s, v1.2s
+; CHECK-GI-NEXT: ret
%1 = tail call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(
<2 x i32> <i32 -2147483648, i32 2147450880>,
<2 x i32> <i32 -65536, i32 65535>)
@@ -293,15 +303,26 @@ define <2 x double> @test_fabd_v2f64(<2 x double> %lhs, <2 x double> %rhs) {
}
define <8 x i16> @test_uabd_knownbits_vec8i16(<8 x i16> %lhs, <8 x i16> %rhs) {
-; CHECK-LABEL: test_uabd_knownbits_vec8i16:
-; CHECK: // %bb.0:
-; CHECK-NEXT: movi v2.8h, #15
-; CHECK-NEXT: and v0.16b, v0.16b, v2.16b
-; CHECK-NEXT: and v1.16b, v1.16b, v2.16b
-; CHECK-NEXT: uabd v0.8h, v0.8h, v1.8h
-; CHECK-NEXT: rev64 v0.8h, v0.8h
-; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #8
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_uabd_knownbits_vec8i16:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: movi v2.8h, #15
+; CHECK-SD-NEXT: and v0.16b, v0.16b, v2.16b
+; CHECK-SD-NEXT: and v1.16b, v1.16b, v2.16b
+; CHECK-SD-NEXT: uabd v0.8h, v0.8h, v1.8h
+; CHECK-SD-NEXT: rev64 v0.8h, v0.8h
+; CHECK-SD-NEXT: ext v0.16b, v0.16b, v0.16b, #8
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_uabd_knownbits_vec8i16:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: movi v2.8h, #15
+; CHECK-GI-NEXT: and v0.16b, v0.16b, v2.16b
+; CHECK-GI-NEXT: and v1.16b, v1.16b, v2.16b
+; CHECK-GI-NEXT: uabd v0.8h, v0.8h, v1.8h
+; CHECK-GI-NEXT: rev64 v0.8h, v0.8h
+; CHECK-GI-NEXT: ext v0.16b, v0.16b, v0.16b, #8
+; CHECK-GI-NEXT: and v0.16b, v0.16b, v2.16b
+; CHECK-GI-NEXT: ret
%and1 = and <8 x i16> %lhs, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
%and2 = and <8 x i16> %rhs, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
%uabd = call <8 x i16> @llvm.aarch64.neon.uabd.v8i16(<8 x i16> %and1, <8 x i16> %and2)
@@ -311,11 +332,22 @@ define <8 x i16> @test_uabd_knownbits_vec8i16(<8 x i16> %lhs, <8 x i16> %rhs) {
}
define <4 x i32> @knownbits_uabd_mask_and_shuffle_lshr(<4 x i32> %a0, <4 x i32> %a1) {
-; CHECK-LABEL: knownbits_uabd_mask_and_shuffle_lshr:
-; CHECK: // %bb.0:
-; CHECK-NEXT: movi v0.2d, #0000000000000000
-; CHECK-NEXT: ushr v0.4s, v0.4s, #17
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: knownbits_uabd_mask_and_shuffle_lshr:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: movi v0.2d, #0000000000000000
+; CHECK-SD-NEXT: ushr v0.4s, v0.4s, #17
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: knownbits_uabd_mask_and_shuffle_lshr:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: movi v2.2d, #0x00ffff0000ffff
+; CHECK-GI-NEXT: and v0.16b, v0.16b, v2.16b
+; CHECK-GI-NEXT: and v1.16b, v1.16b, v2.16b
+; CHECK-GI-NEXT: uabd v0.4s, v0.4s, v1.4s
+; CHECK-GI-NEXT: rev64 v0.4s, v0.4s
+; CHECK-GI-NEXT: ext v0.16b, v0.16b, v0.16b, #8
+; CHECK-GI-NEXT: ushr v0.4s, v0.4s, #17
+; CHECK-GI-NEXT: ret
%1 = and <4 x i32> %a0, <i32 65535, i32 65535, i32 65535, i32 65535>
%2 = and <4 x i32> %a1, <i32 65535, i32 65535, i32 65535, i32 65535>
%3 = call <4 x i32> @llvm.aarch64.neon.uabd.v4i32(<4 x i32> %1, <4 x i32> %2)
@@ -325,10 +357,19 @@ define <4 x i32> @knownbits_uabd_mask_and_shuffle_lshr(<4 x i32> %a0, <4 x i32>
}
define <4 x i32> @knownbits_mask_and_shuffle_lshr(<4 x i32> %a0, <4 x i32> %a1) {
-; CHECK-LABEL: knownbits_mask_and_shuffle_lshr:
-; CHECK: // %bb.0:
-; CHECK-NEXT: movi v0.2d, #0000000000000000
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: knownbits_mask_and_shuffle_lshr:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: movi v0.2d, #0000000000000000
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: knownbits_mask_and_shuffle_lshr:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: movi v2.4s, #127, msl #8
+; CHECK-GI-NEXT: and v0.16b, v0.16b, v2.16b
+; CHECK-GI-NEXT: and v1.16b, v1.16b, v2.16b
+; CHECK-GI-NEXT: uabd v0.4s, v0.4s, v1.4s
+; CHECK-GI-NEXT: ushr v0.4s, v0.4s, #17
+; CHECK-GI-NEXT: ret
%1 = and <4 x i32> %a0, <i32 32767, i32 32767, i32 32767, i32 32767>
%2 = and <4 x i32> %a1, <i32 32767, i32 32767, i32 32767, i32 32767>
%3 = call <4 x i32> @llvm.aarch64.neon.uabd.v4i32(<4 x i32> %1, <4 x i32> %2)
@@ -338,20 +379,36 @@ define <4 x i32> @knownbits_mask_and_shuffle_lshr(<4 x i32> %a0, <4 x i32> %a1)
}
define <4 x i32> @test_sabd_knownbits_vec4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
-; CHECK-LABEL: test_sabd_knownbits_vec4i32:
-; CHECK: // %bb.0:
-; CHECK-NEXT: adrp x8, .LCPI31_0
-; CHECK-NEXT: adrp x9, .LCPI31_1
-; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI31_0]
-; CHECK-NEXT: ldr q3, [x9, :lo12:.LCPI31_1]
-; CHECK-NEXT: and v0.16b, v0.16b, v2.16b
-; CHECK-NEXT: and v1.16b, v1.16b, v3.16b
-; CHECK-NEXT: sabd v0.4s, v0.4s, v1.4s
-; CHECK-NEXT: movi v1.2d, #0x0000ff000000ff
-; CHECK-NEXT: mov v0.s[1], v0.s[0]
-; CHECK-NEXT: trn2 v0.4s, v0.4s, v0.4s
-; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: test_sabd_knownbits_vec4i32:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: adrp x8, .LCPI31_0
+; CHECK-SD-NEXT: adrp x9, .LCPI31_1
+; CHECK-SD-NEXT: ldr q2, [x8, :lo12:.LCPI31_0]
+; CHECK-SD-NEXT: ldr q3, [x9, :lo12:.LCPI31_1]
+; CHECK-SD-NEXT: and v0.16b, v0.16b, v2.16b
+; CHECK-SD-NEXT: and v1.16b, v1.16b, v3.16b
+; CHECK-SD-NEXT: sabd v0.4s, v0.4s, v1.4s
+; CHECK-SD-NEXT: movi v1.2d, #0x0000ff000000ff
+; CHECK-SD-NEXT: mov v0.s[1], v0.s[0]
+; CHECK-SD-NEXT: trn2 v0.4s, v0.4s, v0.4s
+; CHECK-SD-NEXT: and v0.16b, v0.16b, v1.16b
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: test_sabd_knownbits_vec4i32:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: adrp x8, .LCPI31_2
+; CHECK-GI-NEXT: adrp x9, .LCPI31_1
+; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI31_2]
+; CHECK-GI-NEXT: ldr q3, [x9, :lo12:.LCPI31_1]
+; CHECK-GI-NEXT: adrp x8, .LCPI31_0
+; CHECK-GI-NEXT: and v0.16b, v0.16b, v2.16b
+; CHECK-GI-NEXT: and v1.16b, v1.16b, v3.16b
+; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI31_0]
+; CHECK-GI-NEXT: movi v3.2d, #0x0000ff000000ff
+; CHECK-GI-NEXT: sabd v0.4s, v0.4s, v1.4s
+; CHECK-GI-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b
+; CHECK-GI-NEXT: and v0.16b, v0.16b, v3.16b
+; CHECK-GI-NEXT: ret
%and1 = and <4 x i32> %lhs, <i32 255, i32 -1, i32 -1, i32 255>
%and2 = and <4 x i32> %rhs, <i32 255, i32 255, i32 -1, i32 -1>
%abd = call <4 x i32> @llvm.aarch64.neon.sabd.v4i32(<4 x i32> %and1, <4 x i32> %and2)
@@ -361,15 +418,27 @@ define <4 x i32> @test_sabd_knownbits_vec4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
}
define <4 x i32> @knownbits_sabd_and_mask(<4 x i32> %a0, <4 x i32> %a1) {
-; CHECK-LABEL: knownbits_sabd_and_mask:
-; CHECK: // %bb.0:
-; CHECK-NEXT: adrp x8, .LCPI32_0
-; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI32_0]
-; CHECK-NEXT: and v0.16b, v0.16b, v2.16b
-; CHECK-NEXT: and v1.16b, v1.16b, v2.16b
-; CHECK-NEXT: sabd v0.4s, v0.4s, v1.4s
-; CHECK-NEXT: zip2 v0.4s, v0.4s, v0.4s
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: knownbits_sabd_and_mask:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: adrp x8, .LCPI32_0
+; CHECK-SD-NEXT: ldr q2, [x8, :lo12:.LCPI32_0]
+; CHECK-SD-NEXT: and v0.16b, v0.16b, v2.16b
+; CHECK-SD-NEXT: and v1.16b, v1.16b, v2.16b
+; CHECK-SD-NEXT: sabd v0.4s, v0.4s, v1.4s
+; CHECK-SD-NEXT: zip2 v0.4s, v0.4s, v0.4s
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: knownbits_sabd_and_mask:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: adrp x8, .LCPI32_1
+; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI32_1]
+; CHECK-GI-NEXT: adrp x8, .LCPI32_0
+; CHECK-GI-NEXT: and v0.16b, v0.16b, v2.16b
+; CHECK-GI-NEXT: and v1.16b, v1.16b, v2.16b
+; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI32_0]
+; CHECK-GI-NEXT: sabd v0.4s, v0.4s, v1.4s
+; CHECK-GI-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b
+; CHECK-GI-NEXT: ret
%1 = and <4 x i32> %a0, <i32 -1, i32 -1, i32 255, i32 4085>
%2 = and <4 x i32> %a1, <i32 -1, i32 -1, i32 255, i32 4085>
%3 = call <4 x i32> @llvm.aarch64.neon.sabd.v4i32(<4 x i32> %1, <4 x i32> %2)
@@ -378,10 +447,25 @@ define <4 x i32> @knownbits_sabd_and_mask(<4 x i32> %a0, <4 x i32> %a1) {
}
define <4 x i32> @knownbits_sabd_and_or_mask(<4 x i32> %a0, <4 x i32> %a1) {
-; CHECK-LABEL: knownbits_sabd_and_or_mask:
-; CHECK: // %bb.0:
-; CHECK-NEXT: movi v0.2d, #0000000000000000
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: knownbits_sabd_and_or_mask:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: movi v0.2d, #0000000000000000
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: knownbits_sabd_and_or_mask:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: adrp x8, .LCPI33_1
+; CHECK-GI-NEXT: movi v3.2d, #0x00ffff0000ffff
+; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI33_1]
+; CHECK-GI-NEXT: adrp x8, .LCPI33_0
+; CHECK-GI-NEXT: and v0.16b, v0.16b, v2.16b
+; CHECK-GI-NEXT: and v1.16b, v1.16b, v2.16b
+; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI33_0]
+; CHECK-GI-NEXT: orr v0.16b, v0.16b, v3.16b
+; CHECK-GI-NEXT: orr v1.16b, v1.16b, v3.16b
+; CHECK-GI-NEXT: uabd v0.4s, v0.4s, v1.4s
+; CHECK-GI-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b
+; CHECK-GI-NEXT: ret
%1 = and <4 x i32> %a0, <i32 -1, i32 -1, i32 255, i32 4085>
%2 = or <4 x i32> %1, <i32 65535, i32 65535, i32 65535, i32 65535>
%3 = and <4 x i32> %a1, <i32 -1, i32 -1, i32 255, i32 4085>
@@ -392,18 +476,33 @@ define <4 x i32> @knownbits_sabd_and_or_mask(<4 x i32> %a0, <4 x i32> %a1) {
}
define <4 x i32> @knownbits_sabd_and_xor_mask(<4 x i32> %a0, <4 x i32> %a1) {
-; CHECK-LABEL: knownbits_sabd_and_xor_mask:
-; CHECK: // %bb.0:
-; CHECK-NEXT: adrp x8, .LCPI34_0
-; CHECK-NEXT: movi v3.2d, #0x00ffff0000ffff
-; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI34_0]
-; CHECK-NEXT: and v0.16b, v0.16b, v2.16b
-; CHECK-NEXT: and v1.16b, v1.16b, v2.16b
-; CHECK-NEXT: eor v0.16b, v0.16b, v3.16b
-; CHECK-NEXT: eor v1.16b, v1.16b, v3.16b
-; CHECK-NEXT: sabd v0.4s, v0.4s, v1.4s
-; CHECK-NEXT: zip2 v0.4s, v0.4s, v0.4s
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: knownbits_sabd_and_xor_mask:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: adrp x8, .LCPI34_0
+; CHECK-SD-NEXT: movi v3.2d, #0x00ffff0000ffff
+; CHECK-SD-NEXT: ldr q2, [x8, :lo12:.LCPI34_0]
+; CHECK-SD-NEXT: and v0.16b, v0.16b, v2.16b
+; CHECK-SD-NEXT: and v1.16b, v1.16b, v2.16b
+; CHECK-SD-NEXT: eor v0.16b, v0.16b, v3.16b
+; CHECK-SD-NEXT: eor v1.16b, v1.16b, v3.16b
+; CHECK-SD-NEXT: sabd v0.4s, v0.4s, v1.4s
+; CHECK-SD-NEXT: zip2 v0.4s, v0.4s, v0.4s
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: knownbits_sabd_and_xor_mask:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: adrp x8, .LCPI34_1
+; CHECK-GI-NEXT: movi v3.2d, #0x00ffff0000ffff
+; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI34_1]
+; CHECK-GI-NEXT: adrp x8, .LCPI34_0
+; CHECK-GI-NEXT: and v0.16b, v0.16b, v2.16b
+; CHECK-GI-NEXT: and v1.16b, v1.16b, v2.16b
+; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI34_0]
+; CHECK-GI-NEXT: eor v0.16b, v0.16b, v3.16b
+; CHECK-GI-NEXT: eor v1.16b, v1.16b, v3.16b
+; CHECK-GI-NEXT: sabd v0.4s, v0.4s, v1.4s
+; CHECK-GI-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b
+; CHECK-GI-NEXT: ret
%1 = and <4 x i32> %a0, <i32 -1, i32 -1, i32 255, i32 4085>
%2 = xor <4 x i32> %1, <i32 65535, i32 65535, i32 65535, i32 65535>
%3 = and <4 x i32> %a1, <i32 -1, i32 -1, i32 255, i32 4085>
@@ -414,10 +513,24 @@ define <4 x i32> @knownbits_sabd_and_xor_mask(<4 x i32> %a0, <4 x i32> %a1) {
}
define <4 x i32> @knownbits_sabd_and_shl_mask(<4 x i32> %a0, <4 x i32> %a1) {
-; CHECK-LABEL: knownbits_sabd_and_shl_mask:
-; CHECK: // %bb.0:
-; CHECK-NEXT: movi v0.2d, #0000000000000000
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: knownbits_sabd_and_shl_mask:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: movi v0.2d, #0000000000000000
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: knownbits_sabd_and_shl_mask:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: adrp x8, .LCPI35_1
+; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI35_1]
+; CHECK-GI-NEXT: adrp x8, .LCPI35_0
+; CHECK-GI-NEXT: and v0.16b, v0.16b, v2.16b
+; CHECK-GI-NEXT: and v1.16b, v1.16b, v2.16b
+; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI35_0]
+; CHECK-GI-NEXT: shl v0.4s, v0.4s, #17
+; CHECK-GI-NEXT: shl v1.4s, v1.4s, #17
+; CHECK-GI-NEXT: sabd v0.4s, v0.4s, v1.4s
+; CHECK-GI-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b
+; CHECK-GI-NEXT: ret
%1 = and <4 x i32> %a0, <i32 -65536, i32 -7, i32 -7, i32 -65536>
%2 = shl <4 x i32> %1, <i32 17, i32 17, i32 17, i32 17>
%3 = and <4 x i32> %a1, <i32 -65536, i32 -7, i32 -7, i32 -65536>
@@ -428,18 +541,32 @@ define <4 x i32> @knownbits_sabd_and_shl_mask(<4 x i32> %a0, <4 x i32> %a1) {
}
define <4 x i32> @knownbits_sabd_and_mul_mask(<4 x i32> %a0, <4 x i32> %a1) {
-; CHECK-LABEL: knownbits_sabd_and_mul_mask:
-; CHECK: // %bb.0:
-; CHECK-NEXT: adrp x8, .LCPI36_0
-; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI36_0]
-; CHECK-NEXT: and v3.16b, v0.16b, v2.16b
-; CHECK-NEXT: and v2.16b, v1.16b, v2.16b
-; CHECK-NEXT: mul v0.4s, v0.4s, v3.4s
-; CHECK-NEXT: mul v1.4s, v1.4s, v2.4s
-; CHECK-NEXT: sabd v0.4s, v0.4s, v1.4s
-; CHECK-NEXT: mov v0.s[1], v0.s[0]
-; CHECK-NEXT: trn2 v0.4s, v0.4s, v0.4s
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: knownbits_sabd_and_mul_mask:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: adrp x8, .LCPI36_0
+; CHECK-SD-NEXT: ldr q2, [x8, :lo12:.LCPI36_0]
+; CHECK-SD-NEXT: and v3.16b, v0.16b, v2.16b
+; CHECK-SD-NEXT: and v2.16b, v1.16b, v2.16b
+; CHECK-SD-NEXT: mul v0.4s, v0.4s, v3.4s
+; CHECK-SD-NEXT: mul v1.4s, v1.4s, v2.4s
+; CHECK-SD-NEXT: sabd v0.4s, v0.4s, v1.4s
+; CHECK-SD-NEXT: mov v0.s[1], v0.s[0]
+; CHECK-SD-NEXT: trn2 v0.4s, v0.4s, v0.4s
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: knownbits_sabd_and_mul_mask:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: adrp x8, .LCPI36_1
+; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI36_1]
+; CHECK-GI-NEXT: adrp x8, .LCPI36_0
+; CHECK-GI-NEXT: and v3.16b, v0.16b, v2.16b
+; CHECK-GI-NEXT: and v2.16b, v1.16b, v2.16b
+; CHECK-GI-NEXT: mul v0.4s, v0.4s, v3.4s
+; CHECK-GI-NEXT: mul v1.4s, v1.4s, v2.4s
+; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI36_0]
+; CHECK-GI-NEXT: sabd v0.4s, v0.4s, v1.4s
+; CHECK-GI-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b
+; CHECK-GI-NEXT: ret
%1 = and <4 x i32> %a0, <i32 -65536, i32 -7, i32 -7, i32 -65536>
%2 = mul <4 x i32> %a0, %1
%3 = and <4 x i32> %a1, <i32 -65536, i32 -7, i32 -7, i32 -65536>
diff --git a/llvm/test/CodeGen/AArch64/bsp_implicit_ops.mir b/llvm/test/CodeGen/AArch64/bsp_implicit_ops.mir
new file mode 100644
index 0000000..23ac67c
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/bsp_implicit_ops.mir
@@ -0,0 +1,98 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
+# RUN: llc -mtriple=aarch64-none-linux-gnu -run-pass aarch64-expand-pseudo -verify-machineinstrs %s -o - | FileCheck %s
+
+
+---
+name: BSL_COPY
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $q20, $q21, $q22, $q23, $q6, $q1, $q7
+
+
+ ; CHECK-LABEL: name: BSL_COPY
+ ; CHECK: liveins: $q20, $q21, $q22, $q23, $q6, $q1, $q7
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: renamable $q2 = ORRv16i8 killed renamable $q20, killed renamable $q20
+ ; CHECK-NEXT: renamable $q2 = BSLv16i8 killed renamable $q2, renamable $q21, renamable $q6, implicit killed $q21_q22_q23, implicit killed $q0_q1_q2_q3, implicit-def $q0_q1_q2_q3
+ ; CHECK-NEXT: $q22 = ORRv16i8 $q0, killed $q0
+ ; CHECK-NEXT: $q23 = ORRv16i8 $q1, killed $q1
+ ; CHECK-NEXT: $q24 = ORRv16i8 $q2, killed $q2
+ ; CHECK-NEXT: $q25 = ORRv16i8 $q3, killed $q3
+ ; CHECK-NEXT: RET undef $lr, implicit $q22
+ renamable $q2 = BSPv16i8 killed renamable $q20, renamable $q21, renamable $q6, implicit killed $q21_q22_q23, implicit killed $q0_q1_q2_q3, implicit-def $q0_q1_q2_q3
+ $q22 = ORRv16i8 $q0, killed $q0
+ $q23 = ORRv16i8 $q1, killed $q1
+ $q24 = ORRv16i8 $q2, killed $q2
+ $q25 = ORRv16i8 $q3, killed $q3
+ RET_ReallyLR implicit $q22
+...
+---
+name: BSL
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $q20, $q21, $q22, $q23, $q6, $q1, $q7
+
+ ; CHECK-LABEL: name: BSL
+ ; CHECK: liveins: $q20, $q21, $q22, $q23, $q6, $q1, $q7
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: renamable $q2 = BSLv16i8 killed renamable $q2, renamable $q21, renamable $q6, implicit killed $q21_q22_q23, implicit killed $q0_q1_q2_q3, implicit-def $q0_q1_q2_q3
+ ; CHECK-NEXT: $q22 = ORRv16i8 $q0, killed $q0
+ ; CHECK-NEXT: $q23 = ORRv16i8 $q1, killed $q1
+ ; CHECK-NEXT: $q24 = ORRv16i8 $q2, killed $q2
+ ; CHECK-NEXT: $q25 = ORRv16i8 $q3, killed $q3
+ ; CHECK-NEXT: RET undef $lr, implicit $q22
+ renamable $q2 = BSPv16i8 killed renamable $q2, renamable $q21, renamable $q6, implicit killed $q21_q22_q23, implicit killed $q0_q1_q2_q3, implicit-def $q0_q1_q2_q3
+ $q22 = ORRv16i8 $q0, killed $q0
+ $q23 = ORRv16i8 $q1, killed $q1
+ $q24 = ORRv16i8 $q2, killed $q2
+ $q25 = ORRv16i8 $q3, killed $q3
+ RET_ReallyLR implicit $q22
+...
+---
+name: BIF
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $q20, $q21, $q22, $q23, $q6, $q1, $q7
+
+ ; CHECK-LABEL: name: BIF
+ ; CHECK: liveins: $q20, $q21, $q22, $q23, $q6, $q1, $q7
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: renamable $q2 = BIFv16i8 renamable $q2, renamable $q6, killed renamable $q20, implicit killed $q21_q22_q23, implicit killed $q0_q1_q2_q3, implicit-def $q0_q1_q2_q3
+ ; CHECK-NEXT: $q22 = ORRv16i8 $q0, killed $q0
+ ; CHECK-NEXT: $q23 = ORRv16i8 $q1, killed $q1
+ ; CHECK-NEXT: $q24 = ORRv16i8 $q2, killed $q2
+ ; CHECK-NEXT: $q25 = ORRv16i8 $q3, killed $q3
+ ; CHECK-NEXT: RET undef $lr, implicit $q22
+ renamable $q2 = BSPv16i8 killed renamable $q20, renamable $q2, renamable $q6, implicit killed $q21_q22_q23, implicit killed $q0_q1_q2_q3, implicit-def $q0_q1_q2_q3
+ $q22 = ORRv16i8 $q0, killed $q0
+ $q23 = ORRv16i8 $q1, killed $q1
+ $q24 = ORRv16i8 $q2, killed $q2
+ $q25 = ORRv16i8 $q3, killed $q3
+ RET_ReallyLR implicit $q22
+...
+---
+name: BIT
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ liveins: $q20, $q21, $q22, $q23, $q6, $q1, $q7
+
+ ; CHECK-LABEL: name: BIT
+ ; CHECK: liveins: $q20, $q21, $q22, $q23, $q6, $q1, $q7
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: renamable $q2 = BITv16i8 renamable $q2, renamable $q21, killed renamable $q20, implicit killed $q21_q22_q23, implicit killed $q0_q1_q2_q3, implicit-def $q0_q1_q2_q3
+ ; CHECK-NEXT: $q22 = ORRv16i8 $q0, killed $q0
+ ; CHECK-NEXT: $q23 = ORRv16i8 $q1, killed $q1
+ ; CHECK-NEXT: $q24 = ORRv16i8 $q2, killed $q2
+ ; CHECK-NEXT: $q25 = ORRv16i8 $q3, killed $q3
+ ; CHECK-NEXT: RET undef $lr, implicit $q22
+ renamable $q2 = BSPv16i8 killed renamable $q20, renamable $q21, renamable $q2, implicit killed $q21_q22_q23, implicit killed $q0_q1_q2_q3, implicit-def $q0_q1_q2_q3
+ $q22 = ORRv16i8 $q0, killed $q0
+ $q23 = ORRv16i8 $q1, killed $q1
+ $q24 = ORRv16i8 $q2, killed $q2
+ $q25 = ORRv16i8 $q3, killed $q3
+ RET_ReallyLR implicit $q22
+...
diff --git a/llvm/test/CodeGen/AArch64/combine-sdiv.ll b/llvm/test/CodeGen/AArch64/combine-sdiv.ll
index 2b7fa08..e1ba0e9 100644
--- a/llvm/test/CodeGen/AArch64/combine-sdiv.ll
+++ b/llvm/test/CodeGen/AArch64/combine-sdiv.ll
@@ -1631,7 +1631,6 @@ define i8 @combine_i8_sdiv_const100(i8 %x) {
; CHECK-GI-NEXT: sxtb w8, w0
; CHECK-GI-NEXT: mov w9, #41 // =0x29
; CHECK-GI-NEXT: mul w8, w8, w9
-; CHECK-GI-NEXT: sxth w8, w8
; CHECK-GI-NEXT: sbfx w8, w8, #8, #8
; CHECK-GI-NEXT: asr w8, w8, #4
; CHECK-GI-NEXT: ubfx w9, w8, #7, #1
diff --git a/llvm/test/CodeGen/AArch64/freeze.ll b/llvm/test/CodeGen/AArch64/freeze.ll
index 0c56e1b..d428b6a 100644
--- a/llvm/test/CodeGen/AArch64/freeze.ll
+++ b/llvm/test/CodeGen/AArch64/freeze.ll
@@ -395,3 +395,37 @@ define i64 @freeze_array() {
%t1 = add i64 %v1, %v2
ret i64 %t1
}
+
+define <8 x i16> @freeze_abdu(<8 x i16> %a, <8 x i16> %b) {
+; CHECK-SD-LABEL: freeze_abdu:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: uaba v0.8h, v0.8h, v1.8h
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: freeze_abdu:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: uabd v1.8h, v0.8h, v1.8h
+; CHECK-GI-NEXT: add v0.8h, v0.8h, v1.8h
+; CHECK-GI-NEXT: ret
+ %d = call <8 x i16> @llvm.aarch64.neon.uabd.v8i16(<8 x i16> %a, <8 x i16> %b)
+ %f = freeze <8 x i16> %d
+ %r = add <8 x i16> %a, %f
+ ret <8 x i16> %r
+}
+
+define <8 x i16> @freeze_abds(<8 x i16> %a, <8 x i16> %b) {
+; CHECK-SD-LABEL: freeze_abds:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: saba v0.8h, v0.8h, v1.8h
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: freeze_abds:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: sabd v1.8h, v0.8h, v1.8h
+; CHECK-GI-NEXT: add v0.8h, v0.8h, v1.8h
+; CHECK-GI-NEXT: ret
+ %d = call <8 x i16> @llvm.aarch64.neon.sabd.v8i16(<8 x i16> %a, <8 x i16> %b)
+ %f = freeze <8 x i16> %d
+ %r = add <8 x i16> %a, %f
+ ret <8 x i16> %r
+}
diff --git a/llvm/test/CodeGen/AArch64/highextractbitcast.ll b/llvm/test/CodeGen/AArch64/highextractbitcast.ll
index f82d1ed..df4889b 100644
--- a/llvm/test/CodeGen/AArch64/highextractbitcast.ll
+++ b/llvm/test/CodeGen/AArch64/highextractbitcast.ll
@@ -1,6 +1,10 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=aarch64-unknown-linux-gnu < %s | FileCheck %s --check-prefix CHECK-LE
+; RUN: llc -mtriple=aarch64-unknown-linux-gnu < %s | FileCheck %s --check-prefixes CHECK,CHECK-LE
; RUN: llc -mtriple=aarch64_be-unknown-linux-gnu < %s | FileCheck %s --check-prefix CHECK-BE
+; RUN: llc -mtriple=aarch64-unknown-linux-gnu -global-isel -global-isel -global-isel-abort=2 2>&1 < %s | FileCheck %s --check-prefixes CHECK,CHECK-GI
+
+; CHECK-GI: warning: Instruction selection used fallback path for test_pmull_high_p8_128
+; CHECK-GI-NEXT: warning: Instruction selection used fallback path for test_pmull_high_p8_64
declare <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16>, <4 x i16>)
declare <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16>, <4 x i16>)
@@ -12,10 +16,10 @@ declare <8 x i16> @llvm.aarch64.neon.pmull.v8i16(<8 x i8>, <8 x i8>)
declare <2 x i64> @llvm.aarch64.neon.umull.v2i64(<2 x i32> %s1, <2 x i32> %s2)
define <4 x i32> @test_smull_high_s16_base(<8 x i16> %a, <8 x i16> %b) #0 {
-; CHECK-LE-LABEL: test_smull_high_s16_base:
-; CHECK-LE: // %bb.0: // %entry
-; CHECK-LE-NEXT: smull2 v0.4s, v0.8h, v1.8h
-; CHECK-LE-NEXT: ret
+; CHECK-LABEL: test_smull_high_s16_base:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: smull2 v0.4s, v0.8h, v1.8h
+; CHECK-NEXT: ret
;
; CHECK-BE-LABEL: test_smull_high_s16_base:
; CHECK-BE: // %bb.0: // %entry
@@ -35,10 +39,10 @@ entry:
}
define <4 x i32> @test_smull_high_s16_bitcasta1(<2 x i64> %aa, <8 x i16> %b) #0 {
-; CHECK-LE-LABEL: test_smull_high_s16_bitcasta1:
-; CHECK-LE: // %bb.0: // %entry
-; CHECK-LE-NEXT: smull2 v0.4s, v0.8h, v1.8h
-; CHECK-LE-NEXT: ret
+; CHECK-LABEL: test_smull_high_s16_bitcasta1:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: smull2 v0.4s, v0.8h, v1.8h
+; CHECK-NEXT: ret
;
; CHECK-BE-LABEL: test_smull_high_s16_bitcasta1:
; CHECK-BE: // %bb.0: // %entry
@@ -59,10 +63,10 @@ entry:
}
define <4 x i32> @test_smull_high_s16_bitcastb1(<8 x i16> %a, <16 x i8> %bb) #0 {
-; CHECK-LE-LABEL: test_smull_high_s16_bitcastb1:
-; CHECK-LE: // %bb.0: // %entry
-; CHECK-LE-NEXT: smull2 v0.4s, v0.8h, v1.8h
-; CHECK-LE-NEXT: ret
+; CHECK-LABEL: test_smull_high_s16_bitcastb1:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: smull2 v0.4s, v0.8h, v1.8h
+; CHECK-NEXT: ret
;
; CHECK-BE-LABEL: test_smull_high_s16_bitcastb1:
; CHECK-BE: // %bb.0: // %entry
@@ -83,10 +87,10 @@ entry:
}
define <4 x i32> @test_smull_high_s16_bitcasta2(<2 x i64> %a, <8 x i16> %b) #0 {
-; CHECK-LE-LABEL: test_smull_high_s16_bitcasta2:
-; CHECK-LE: // %bb.0: // %entry
-; CHECK-LE-NEXT: smull2 v0.4s, v0.8h, v1.8h
-; CHECK-LE-NEXT: ret
+; CHECK-LABEL: test_smull_high_s16_bitcasta2:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: smull2 v0.4s, v0.8h, v1.8h
+; CHECK-NEXT: ret
;
; CHECK-BE-LABEL: test_smull_high_s16_bitcasta2:
; CHECK-BE: // %bb.0: // %entry
@@ -109,10 +113,10 @@ entry:
}
define <4 x i32> @test_smull_high_s16_bitcastb2(<8 x i16> %a, <16 x i8> %b) #0 {
-; CHECK-LE-LABEL: test_smull_high_s16_bitcastb2:
-; CHECK-LE: // %bb.0: // %entry
-; CHECK-LE-NEXT: smull2 v0.4s, v0.8h, v1.8h
-; CHECK-LE-NEXT: ret
+; CHECK-LABEL: test_smull_high_s16_bitcastb2:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: smull2 v0.4s, v0.8h, v1.8h
+; CHECK-NEXT: ret
;
; CHECK-BE-LABEL: test_smull_high_s16_bitcastb2:
; CHECK-BE: // %bb.0: // %entry
@@ -157,6 +161,13 @@ define <4 x i32> @test_smull_high_s16_bitcasta1_wrongindex(<2 x i64> %aa, <8 x i
; CHECK-BE-NEXT: rev64 v0.4s, v0.4s
; CHECK-BE-NEXT: ext v0.16b, v0.16b, v0.16b, #8
; CHECK-BE-NEXT: ret
+;
+; CHECK-GI-LABEL: test_smull_high_s16_bitcasta1_wrongindex:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: ext v0.16b, v0.16b, v0.16b, #4
+; CHECK-GI-NEXT: mov d1, v1.d[1]
+; CHECK-GI-NEXT: smull v0.4s, v0.4h, v1.4h
+; CHECK-GI-NEXT: ret
entry:
%a = bitcast <2 x i64> %aa to <8 x i16>
%s1 = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
@@ -186,6 +197,13 @@ define <4 x i32> @test_smull_high_s16_bitcastb1_wrongindex(<8 x i16> %a, <16 x i
; CHECK-BE-NEXT: rev64 v0.4s, v0.4s
; CHECK-BE-NEXT: ext v0.16b, v0.16b, v0.16b, #8
; CHECK-BE-NEXT: ret
+;
+; CHECK-GI-LABEL: test_smull_high_s16_bitcastb1_wrongindex:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: mov d0, v0.d[1]
+; CHECK-GI-NEXT: ext v1.16b, v1.16b, v0.16b, #6
+; CHECK-GI-NEXT: smull v0.4s, v0.4h, v1.4h
+; CHECK-GI-NEXT: ret
entry:
%b = bitcast <16 x i8> %bb to <8 x i16>
%s1 = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
@@ -215,6 +233,13 @@ define <4 x i32> @test_smull_high_s16_bitcasta2_wrongindex(<4 x i32> %a, <8 x i1
; CHECK-BE-NEXT: rev64 v0.4s, v0.4s
; CHECK-BE-NEXT: ext v0.16b, v0.16b, v0.16b, #8
; CHECK-BE-NEXT: ret
+;
+; CHECK-GI-LABEL: test_smull_high_s16_bitcasta2_wrongindex:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: ext v0.16b, v0.16b, v0.16b, #4
+; CHECK-GI-NEXT: mov d1, v1.d[1]
+; CHECK-GI-NEXT: smull v0.4s, v0.4h, v1.4h
+; CHECK-GI-NEXT: ret
entry:
%s1a = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 1, i32 2>
%s1 = bitcast <2 x i32> %s1a to <4 x i16>
@@ -244,6 +269,13 @@ define <4 x i32> @test_smull_high_s16_bitcastb2_wrongindex(<8 x i16> %a, <16 x i
; CHECK-BE-NEXT: rev64 v0.4s, v0.4s
; CHECK-BE-NEXT: ext v0.16b, v0.16b, v0.16b, #8
; CHECK-BE-NEXT: ret
+;
+; CHECK-GI-LABEL: test_smull_high_s16_bitcastb2_wrongindex:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: mov d0, v0.d[1]
+; CHECK-GI-NEXT: ext v1.16b, v1.16b, v0.16b, #4
+; CHECK-GI-NEXT: smull v0.4s, v0.4h, v1.4h
+; CHECK-GI-NEXT: ret
entry:
%s1 = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
%s2a = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
@@ -269,6 +301,12 @@ define <4 x i32> @test_smull_high_s16_splata1(<2 x i64> %aa, <8 x i16> %b) #0 {
; CHECK-BE-NEXT: rev64 v0.4s, v0.4s
; CHECK-BE-NEXT: ext v0.16b, v0.16b, v0.16b, #8
; CHECK-BE-NEXT: ret
+;
+; CHECK-GI-LABEL: test_smull_high_s16_splata1:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: mov d1, v1.d[1]
+; CHECK-GI-NEXT: smull v0.4s, v1.4h, v0.h[3]
+; CHECK-GI-NEXT: ret
entry:
%a = bitcast <2 x i64> %aa to <8 x i16>
%s1 = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
@@ -293,6 +331,12 @@ define <4 x i32> @test_smull_high_s16_splatb1(<8 x i16> %a, <16 x i8> %bb) #0 {
; CHECK-BE-NEXT: rev64 v0.4s, v0.4s
; CHECK-BE-NEXT: ext v0.16b, v0.16b, v0.16b, #8
; CHECK-BE-NEXT: ret
+;
+; CHECK-GI-LABEL: test_smull_high_s16_splatb1:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: mov d0, v0.d[1]
+; CHECK-GI-NEXT: smull v0.4s, v0.4h, v1.h[3]
+; CHECK-GI-NEXT: ret
entry:
%b = bitcast <16 x i8> %bb to <8 x i16>
%s1 = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
@@ -322,6 +366,13 @@ define <4 x i32> @test_smull_high_s16_splata2(<4 x i32> %a, <8 x i16> %b) #0 {
; CHECK-BE-NEXT: rev64 v0.4s, v0.4s
; CHECK-BE-NEXT: ext v0.16b, v0.16b, v0.16b, #8
; CHECK-BE-NEXT: ret
+;
+; CHECK-GI-LABEL: test_smull_high_s16_splata2:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: dup v0.2s, v0.s[3]
+; CHECK-GI-NEXT: mov d1, v1.d[1]
+; CHECK-GI-NEXT: smull v0.4s, v0.4h, v1.4h
+; CHECK-GI-NEXT: ret
entry:
%s1a = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 3, i32 3>
%s1 = bitcast <2 x i32> %s1a to <4 x i16>
@@ -351,6 +402,13 @@ define <4 x i32> @test_smull_high_s16_splatb2(<8 x i16> %a, <16 x i8> %b) #0 {
; CHECK-BE-NEXT: rev64 v0.4s, v0.4s
; CHECK-BE-NEXT: ext v0.16b, v0.16b, v0.16b, #8
; CHECK-BE-NEXT: ret
+;
+; CHECK-GI-LABEL: test_smull_high_s16_splatb2:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: mov d0, v0.d[1]
+; CHECK-GI-NEXT: dup v1.8b, v1.b[3]
+; CHECK-GI-NEXT: smull v0.4s, v0.4h, v1.4h
+; CHECK-GI-NEXT: ret
entry:
%s1 = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
%s2a = shufflevector <16 x i8> %b, <16 x i8> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
@@ -362,10 +420,10 @@ entry:
define <4 x i32> @test_umull_high_s16_bitcasta1(<2 x i64> %aa, <8 x i16> %b) #0 {
-; CHECK-LE-LABEL: test_umull_high_s16_bitcasta1:
-; CHECK-LE: // %bb.0: // %entry
-; CHECK-LE-NEXT: umull2 v0.4s, v0.8h, v1.8h
-; CHECK-LE-NEXT: ret
+; CHECK-LABEL: test_umull_high_s16_bitcasta1:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: umull2 v0.4s, v0.8h, v1.8h
+; CHECK-NEXT: ret
;
; CHECK-BE-LABEL: test_umull_high_s16_bitcasta1:
; CHECK-BE: // %bb.0: // %entry
@@ -386,10 +444,10 @@ entry:
}
define <8 x i16> @test_vabdl_high_u82(<16 x i8> %a, <8 x i16> %bb) {
-; CHECK-LE-LABEL: test_vabdl_high_u82:
-; CHECK-LE: // %bb.0: // %entry
-; CHECK-LE-NEXT: uabdl2 v0.8h, v0.16b, v1.16b
-; CHECK-LE-NEXT: ret
+; CHECK-LABEL: test_vabdl_high_u82:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: uabdl2 v0.8h, v0.16b, v1.16b
+; CHECK-NEXT: ret
;
; CHECK-BE-LABEL: test_vabdl_high_u82:
; CHECK-BE: // %bb.0: // %entry
@@ -411,10 +469,10 @@ entry:
}
define <8 x i16> @test_vabdl_high_s82(<16 x i8> %a, <8 x i16> %bb) {
-; CHECK-LE-LABEL: test_vabdl_high_s82:
-; CHECK-LE: // %bb.0: // %entry
-; CHECK-LE-NEXT: sabdl2 v0.8h, v0.16b, v1.16b
-; CHECK-LE-NEXT: ret
+; CHECK-LABEL: test_vabdl_high_s82:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: sabdl2 v0.8h, v0.16b, v1.16b
+; CHECK-NEXT: ret
;
; CHECK-BE-LABEL: test_vabdl_high_s82:
; CHECK-BE: // %bb.0: // %entry
@@ -436,10 +494,10 @@ entry:
}
define <4 x i32> @test_vqdmlal_high_s16_bitcast(<4 x i32> %a, <8 x i16> %b, <16 x i8> %cc) {
-; CHECK-LE-LABEL: test_vqdmlal_high_s16_bitcast:
-; CHECK-LE: // %bb.0: // %entry
-; CHECK-LE-NEXT: sqdmlal2 v0.4s, v1.8h, v2.8h
-; CHECK-LE-NEXT: ret
+; CHECK-LABEL: test_vqdmlal_high_s16_bitcast:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: sqdmlal2 v0.4s, v1.8h, v2.8h
+; CHECK-NEXT: ret
;
; CHECK-BE-LABEL: test_vqdmlal_high_s16_bitcast:
; CHECK-BE: // %bb.0: // %entry
@@ -463,12 +521,12 @@ entry:
}
define <8 x i16> @test_pmull_high_p8_128(i128 %aa, i128 %bb) {
-; CHECK-LE-LABEL: test_pmull_high_p8_128:
-; CHECK-LE: // %bb.0: // %entry
-; CHECK-LE-NEXT: fmov d0, x3
-; CHECK-LE-NEXT: fmov d1, x1
-; CHECK-LE-NEXT: pmull v0.8h, v1.8b, v0.8b
-; CHECK-LE-NEXT: ret
+; CHECK-LABEL: test_pmull_high_p8_128:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fmov d0, x3
+; CHECK-NEXT: fmov d1, x1
+; CHECK-NEXT: pmull v0.8h, v1.8b, v0.8b
+; CHECK-NEXT: ret
;
; CHECK-BE-LABEL: test_pmull_high_p8_128:
; CHECK-BE: // %bb.0: // %entry
@@ -490,10 +548,10 @@ entry:
}
define <8 x i16> @test_pmull_high_p8_64(<2 x i64> %aa, <2 x i64> %bb) {
-; CHECK-LE-LABEL: test_pmull_high_p8_64:
-; CHECK-LE: // %bb.0: // %entry
-; CHECK-LE-NEXT: pmull2 v0.8h, v0.16b, v1.16b
-; CHECK-LE-NEXT: ret
+; CHECK-LABEL: test_pmull_high_p8_64:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: pmull2 v0.8h, v0.16b, v1.16b
+; CHECK-NEXT: ret
;
; CHECK-BE-LABEL: test_pmull_high_p8_64:
; CHECK-BE: // %bb.0: // %entry
@@ -532,6 +590,14 @@ define <8 x i16> @foov8i16(<16 x i8> %a1, <2 x i64> %b1) {
; CHECK-BE-NEXT: rev64 v0.8h, v0.8h
; CHECK-BE-NEXT: ext v0.16b, v0.16b, v0.16b, #8
; CHECK-BE-NEXT: ret
+;
+; CHECK-GI-LABEL: foov8i16:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: shrn v1.4h, v1.4s, #5
+; CHECK-GI-NEXT: shrn v0.4h, v0.4s, #5
+; CHECK-GI-NEXT: fmov x8, d1
+; CHECK-GI-NEXT: mov v0.d[1], x8
+; CHECK-GI-NEXT: ret
%a0 = bitcast <16 x i8> %a1 to <4 x i32>
%b0 = bitcast <2 x i64> %b1 to <4 x i32>
%vshrn_low_shift = lshr <4 x i32> %a0, <i32 5, i32 5, i32 5, i32 5>
@@ -558,6 +624,12 @@ define <2 x i64> @hadd32_zext_asr(<16 x i8> %src1a) {
; CHECK-BE-NEXT: ushll2 v0.2d, v0.4s, #1
; CHECK-BE-NEXT: ext v0.16b, v0.16b, v0.16b, #8
; CHECK-BE-NEXT: ret
+;
+; CHECK-GI-LABEL: hadd32_zext_asr:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: mov d0, v0.d[1]
+; CHECK-GI-NEXT: ushll v0.2d, v0.2s, #1
+; CHECK-GI-NEXT: ret
%src1 = bitcast <16 x i8> %src1a to <4 x i32>
%s1 = shufflevector <4 x i32> %src1, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
%zextsrc1 = zext <2 x i32> %s1 to <2 x i64>
@@ -580,6 +652,12 @@ define <2 x i64> @test_umull_high_s16_splata1(<2 x i64> %aa, <4 x i32> %b) #0 {
; CHECK-BE-NEXT: umull2 v0.2d, v1.4s, v0.s[1]
; CHECK-BE-NEXT: ext v0.16b, v0.16b, v0.16b, #8
; CHECK-BE-NEXT: ret
+;
+; CHECK-GI-LABEL: test_umull_high_s16_splata1:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: mov d1, v1.d[1]
+; CHECK-GI-NEXT: umull v0.2d, v1.2s, v0.s[1]
+; CHECK-GI-NEXT: ret
entry:
%a = bitcast <2 x i64> %aa to <4 x i32>
%s1 = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> <i32 1, i32 1>
diff --git a/llvm/test/CodeGen/AArch64/neon-saba.ll b/llvm/test/CodeGen/AArch64/neon-saba.ll
index 78ccc89..19967bd 100644
--- a/llvm/test/CodeGen/AArch64/neon-saba.ll
+++ b/llvm/test/CodeGen/AArch64/neon-saba.ll
@@ -1,13 +1,21 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple aarch64-unknown-linux-gnu < %s | FileCheck %s
+; RUN: llc -mtriple aarch64-unknown-linux-gnu < %s | FileCheck %s --check-prefixes=CHECK,CHECK-SD
+; RUN: llc -mtriple aarch64-unknown-linux-gnu -global-isel < %s | FileCheck %s --check-prefixes=CHECK,CHECK-GI
; SABA from ADD(ABS(SUB NSW))
define <4 x i32> @saba_abs_4s(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) #0 {
-; CHECK-LABEL: saba_abs_4s:
-; CHECK: // %bb.0:
-; CHECK-NEXT: saba v0.4s, v1.4s, v2.4s
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: saba_abs_4s:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: saba v0.4s, v1.4s, v2.4s
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: saba_abs_4s:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: sub v1.4s, v1.4s, v2.4s
+; CHECK-GI-NEXT: abs v1.4s, v1.4s
+; CHECK-GI-NEXT: add v0.4s, v0.4s, v1.4s
+; CHECK-GI-NEXT: ret
%sub = sub nsw <4 x i32> %b, %c
%abs = call <4 x i32> @llvm.abs.v4i32(<4 x i32> %sub, i1 true)
%add = add <4 x i32> %a, %abs
@@ -15,10 +23,17 @@ define <4 x i32> @saba_abs_4s(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) #0 {
}
define <2 x i32> @saba_abs_2s(<2 x i32> %a, <2 x i32> %b, <2 x i32> %c) #0 {
-; CHECK-LABEL: saba_abs_2s:
-; CHECK: // %bb.0:
-; CHECK-NEXT: saba v0.2s, v1.2s, v2.2s
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: saba_abs_2s:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: saba v0.2s, v1.2s, v2.2s
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: saba_abs_2s:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: sub v1.2s, v1.2s, v2.2s
+; CHECK-GI-NEXT: abs v1.2s, v1.2s
+; CHECK-GI-NEXT: add v0.2s, v0.2s, v1.2s
+; CHECK-GI-NEXT: ret
%sub = sub nsw <2 x i32> %b, %c
%abs = call <2 x i32> @llvm.abs.v2i32(<2 x i32> %sub, i1 true)
%add = add <2 x i32> %a, %abs
@@ -26,10 +41,17 @@ define <2 x i32> @saba_abs_2s(<2 x i32> %a, <2 x i32> %b, <2 x i32> %c) #0 {
}
define <8 x i16> @saba_abs_8h(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c) #0 {
-; CHECK-LABEL: saba_abs_8h:
-; CHECK: // %bb.0:
-; CHECK-NEXT: saba v0.8h, v1.8h, v2.8h
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: saba_abs_8h:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: saba v0.8h, v1.8h, v2.8h
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: saba_abs_8h:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: sub v1.8h, v1.8h, v2.8h
+; CHECK-GI-NEXT: abs v1.8h, v1.8h
+; CHECK-GI-NEXT: add v0.8h, v0.8h, v1.8h
+; CHECK-GI-NEXT: ret
%sub = sub nsw <8 x i16> %b, %c
%abs = call <8 x i16> @llvm.abs.v8i16(<8 x i16> %sub, i1 true)
%add = add <8 x i16> %a, %abs
@@ -37,10 +59,17 @@ define <8 x i16> @saba_abs_8h(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c) #0 {
}
define <4 x i16> @saba_abs_4h(<4 x i16> %a, <4 x i16> %b, <4 x i16> %c) #0 {
-; CHECK-LABEL: saba_abs_4h:
-; CHECK: // %bb.0:
-; CHECK-NEXT: saba v0.4h, v1.4h, v2.4h
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: saba_abs_4h:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: saba v0.4h, v1.4h, v2.4h
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: saba_abs_4h:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: sub v1.4h, v1.4h, v2.4h
+; CHECK-GI-NEXT: abs v1.4h, v1.4h
+; CHECK-GI-NEXT: add v0.4h, v0.4h, v1.4h
+; CHECK-GI-NEXT: ret
%sub = sub nsw <4 x i16> %b, %c
%abs = call <4 x i16> @llvm.abs.v4i16(<4 x i16> %sub, i1 true)
%add = add <4 x i16> %a, %abs
@@ -48,10 +77,17 @@ define <4 x i16> @saba_abs_4h(<4 x i16> %a, <4 x i16> %b, <4 x i16> %c) #0 {
}
define <16 x i8> @saba_abs_16b(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) #0 {
-; CHECK-LABEL: saba_abs_16b:
-; CHECK: // %bb.0:
-; CHECK-NEXT: saba v0.16b, v1.16b, v2.16b
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: saba_abs_16b:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: saba v0.16b, v1.16b, v2.16b
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: saba_abs_16b:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: sub v1.16b, v1.16b, v2.16b
+; CHECK-GI-NEXT: abs v1.16b, v1.16b
+; CHECK-GI-NEXT: add v0.16b, v0.16b, v1.16b
+; CHECK-GI-NEXT: ret
%sub = sub nsw <16 x i8> %b, %c
%abs = call <16 x i8> @llvm.abs.v16i8(<16 x i8> %sub, i1 true)
%add = add <16 x i8> %a, %abs
@@ -59,10 +95,17 @@ define <16 x i8> @saba_abs_16b(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) #0 {
}
define <8 x i8> @saba_abs_8b(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c) #0 {
-; CHECK-LABEL: saba_abs_8b:
-; CHECK: // %bb.0:
-; CHECK-NEXT: saba v0.8b, v1.8b, v2.8b
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: saba_abs_8b:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: saba v0.8b, v1.8b, v2.8b
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: saba_abs_8b:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: sub v1.8b, v1.8b, v2.8b
+; CHECK-GI-NEXT: abs v1.8b, v1.8b
+; CHECK-GI-NEXT: add v0.8b, v0.8b, v1.8b
+; CHECK-GI-NEXT: ret
%sub = sub nsw <8 x i8> %b, %c
%abs = call <8 x i8> @llvm.abs.v8i8(<8 x i8> %sub, i1 true)
%add = add <8 x i8> %a, %abs
diff --git a/llvm/test/CodeGen/AArch64/rem-by-const.ll b/llvm/test/CodeGen/AArch64/rem-by-const.ll
index b124042..c57383a 100644
--- a/llvm/test/CodeGen/AArch64/rem-by-const.ll
+++ b/llvm/test/CodeGen/AArch64/rem-by-const.ll
@@ -52,7 +52,6 @@ define i8 @si8_100(i8 %a, i8 %b) {
; CHECK-GI-NEXT: sxtb w8, w0
; CHECK-GI-NEXT: mov w9, #41 // =0x29
; CHECK-GI-NEXT: mul w8, w8, w9
-; CHECK-GI-NEXT: sxth w8, w8
; CHECK-GI-NEXT: sbfx w8, w8, #8, #8
; CHECK-GI-NEXT: asr w8, w8, #4
; CHECK-GI-NEXT: ubfx w9, w8, #7, #1
diff --git a/llvm/test/CodeGen/AArch64/stack-tagging.ll b/llvm/test/CodeGen/AArch64/stack-tagging.ll
index 8759fb1..5d73c7b 100644
--- a/llvm/test/CodeGen/AArch64/stack-tagging.ll
+++ b/llvm/test/CodeGen/AArch64/stack-tagging.ll
@@ -143,54 +143,4 @@ l:
; CHECK-NOT: @llvm.aarch64.irg.sp
; CHECK: ret void
-; If we can't trace one of the lifetime markers to a single alloca, fall back
-; to poisoning all allocas at the beginning of the function.
-; Each alloca must be poisoned only once.
-define void @UnrecognizedLifetime(i8 %v) sanitize_memtag {
-entry:
- %x = alloca i32, align 4
- %y = alloca i32, align 4
- %z = alloca i32, align 4
- %tobool = icmp eq i8 %v, 0
- %xy = select i1 %tobool, ptr %x, ptr %y
- %cxcy = select i1 %tobool, ptr %x, ptr %y
- br label %another_bb
-
-another_bb:
- call void @llvm.lifetime.start.p0(i64 4, ptr nonnull %z)
- store i32 7, ptr %z
- call void @noUse32(ptr %z)
- call void @llvm.lifetime.end.p0(i64 4, ptr nonnull %z)
- call void @llvm.lifetime.start.p0(i64 4, ptr nonnull %z)
- store i32 7, ptr %z
- call void @llvm.lifetime.end.p0(i64 4, ptr nonnull %z)
- call void @llvm.lifetime.start.p0(i64 4, ptr nonnull %cxcy)
- store i32 8, ptr %xy
- call void @noUse32(ptr %x)
- call void @noUse32(ptr %y)
- call void @llvm.lifetime.end.p0(i64 4, ptr nonnull %cxcy)
- ret void
-}
-
-; CHECK-LABEL: define void @UnrecognizedLifetime(
-; CHECK: call ptr @llvm.aarch64.irg.sp(i64 0)
-; CHECK: alloca { i32, [12 x i8] }, align 16
-; CHECK: call ptr @llvm.aarch64.tagp
-; CHECK: call void @llvm.aarch64.settag(
-; CHECK: alloca { i32, [12 x i8] }, align 16
-; CHECK: call ptr @llvm.aarch64.tagp
-; CHECK: call void @llvm.aarch64.settag(
-; CHECK: alloca { i32, [12 x i8] }, align 16
-; CHECK: call ptr @llvm.aarch64.tagp
-; CHECK: call void @llvm.aarch64.settag(
-; CHECK: store i32
-; CHECK: call void @noUse32(ptr
-; CHECK: store i32
-; CHECK: store i32
-; CHECK: call void @noUse32(ptr
-; CHECK: call void @llvm.aarch64.settag(
-; CHECK: call void @llvm.aarch64.settag(
-; CHECK: call void @llvm.aarch64.settag(
-; CHECK: ret void
-
!0 = !{}
diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-ldst-ext.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-ldst-ext.ll
index 4153f0b..9698f1a 100644
--- a/llvm/test/CodeGen/AArch64/sve-intrinsics-ldst-ext.ll
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-ldst-ext.ll
@@ -231,3 +231,27 @@ define <vscale x 8 x i64> @sload_8i8_8i64(ptr %a) {
%aext = sext <vscale x 8 x i8> %aval to <vscale x 8 x i64>
ret <vscale x 8 x i64> %aext
}
+
+; Ensure we don't try to promote a predicate load to a sign-extended load.
+define <vscale x 16 x i8> @sload_16i1_16i8(ptr %addr) {
+; CHECK-LABEL: sload_16i1_16i8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldr p0, [x0]
+; CHECK-NEXT: mov z0.b, p0/z, #-1 // =0xffffffffffffffff
+; CHECK-NEXT: ret
+ %load = load <vscale x 16 x i1>, ptr %addr
+ %zext = sext <vscale x 16 x i1> %load to <vscale x 16 x i8>
+ ret <vscale x 16 x i8> %zext
+}
+
+; Ensure we don't try to promote a predicate load to a zero-extended load.
+define <vscale x 16 x i8> @zload_16i1_16i8(ptr %addr) {
+; CHECK-LABEL: zload_16i1_16i8:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldr p0, [x0]
+; CHECK-NEXT: mov z0.b, p0/z, #1 // =0x1
+; CHECK-NEXT: ret
+ %load = load <vscale x 16 x i1>, ptr %addr
+ %zext = zext <vscale x 16 x i1> %load to <vscale x 16 x i8>
+ ret <vscale x 16 x i8> %zext
+}
diff --git a/llvm/test/CodeGen/AArch64/wineh-reuse-catch-alloca.ll b/llvm/test/CodeGen/AArch64/wineh-reuse-catch-alloca.ll
deleted file mode 100644
index 18b8aab..0000000
--- a/llvm/test/CodeGen/AArch64/wineh-reuse-catch-alloca.ll
+++ /dev/null
@@ -1,100 +0,0 @@
-; RUN: llc %s --mtriple=aarch64-pc-windows-msvc -o - | FileCheck %s
-
-; Tests the fixed object layouts when two catchpads re-use the same stack
-; allocation for this catch objects.
-
-; Generated from this C++ code, with modifications to the IR (see comments in
-; IR):
-; https://godbolt.org/z/9qv5Yn68j
-; > clang --target=aarch64-pc-windows-msvc test.cpp
-; ```
-; extern "C" void boom();
-; extern "C" int calls_boom();
-; {
-; try { boom(); }
-; catch (int& i) { return i; }
-; catch (long& l) { return l; }
-; return 0;
-; }
-; ```
-
-; Only need 48 bytes on the stack, not 64.
-; CHECK-LABEL: calls_boom:
-; CHECK: sub sp, sp, #48
-; CHECK: .seh_stackalloc 48
-
-; Both the catch blocks load from the same address.
-; CHECK-LABEL: "?catch$3@?0?calls_boom@4HA":
-; CHECK: ldr x8, [x29, #24]
-; CHECK-LABEL: "?catch$4@?0?calls_boom@4HA":
-; CHECK: ldr x8, [x29, #24]
-
-; There's enough space for the UnwindHelp to be at -16 instead of -32
-; CHECK-LABEL: $cppxdata$calls_boom:
-; CHECK: .word -16 // UnwindHelp
-
-; Both catches have the same object offset.
-; CHECK-LABEL: $handlerMap$0$calls_boom:
-; CHECK: .word -8 // CatchObjOffset
-; CHECK-NEXT: .word "?catch$3@?0?calls_boom@4HA"@IMGREL // Handler
-; CHECK: .word -8 // CatchObjOffset
-; CHECK-NEXT: .word "?catch$4@?0?calls_boom@4HA"@IMGREL // Handler
-
-%rtti.TypeDescriptor2 = type { ptr, ptr, [3 x i8] }
-
-$"??_R0H@8" = comdat any
-
-$"??_R0J@8" = comdat any
-
-@"??_7type_info@@6B@" = external constant ptr
-@"??_R0H@8" = linkonce_odr global %rtti.TypeDescriptor2 { ptr @"??_7type_info@@6B@", ptr null, [3 x i8] c".H\00" }, comdat
-@"??_R0J@8" = linkonce_odr global %rtti.TypeDescriptor2 { ptr @"??_7type_info@@6B@", ptr null, [3 x i8] c".J\00" }, comdat
-
-define dso_local i32 @calls_boom() personality ptr @__CxxFrameHandler3 {
-entry:
- %retval = alloca i32, align 4
-; MODIFICATION: Remove unusued alloca
-; %l = alloca ptr, align 8
- %i = alloca ptr, align 8
- invoke void @boom()
- to label %invoke.cont unwind label %catch.dispatch
-
-catch.dispatch:
- %0 = catchswitch within none [label %catch1, label %catch] unwind to caller
-
-catch1:
- %1 = catchpad within %0 [ptr @"??_R0H@8", i32 8, ptr %i]
- %2 = load ptr, ptr %i, align 8
- %3 = load i32, ptr %2, align 4
- store i32 %3, ptr %retval, align 4
- catchret from %1 to label %catchret.dest2
-
-catch:
-; MODIFICATION: Use %i instead of %l
- %4 = catchpad within %0 [ptr @"??_R0J@8", i32 8, ptr %i]
- %5 = load ptr, ptr %i, align 8
- %6 = load i32, ptr %5, align 4
- store i32 %6, ptr %retval, align 4
- catchret from %4 to label %catchret.dest
-
-invoke.cont:
- br label %try.cont
-
-catchret.dest:
- br label %return
-
-catchret.dest2:
- br label %return
-
-try.cont:
- store i32 0, ptr %retval, align 4
- br label %return
-
-return:
- %7 = load i32, ptr %retval, align 4
- ret i32 %7
-}
-
-declare dso_local void @boom() #1
-
-declare dso_local i32 @__CxxFrameHandler3(...)