aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorVladislav Dzhidzhoev <vdzhidzhoev@accesssoftek.com>2023-08-14 11:16:04 +0200
committerVladislav Dzhidzhoev <vdzhidzhoev@accesssoftek.com>2023-09-20 18:22:54 +0200
commitfb8f59156f0f208f6192ed808fc223eda6c0e7ec (patch)
tree9d43d4630eaef600c5b4aef78a5fe3e96177bd09
parentde018f5ca4b2598f6bbf7a27a1fb51935792c276 (diff)
downloadllvm-fb8f59156f0f208f6192ed808fc223eda6c0e7ec.zip
llvm-fb8f59156f0f208f6192ed808fc223eda6c0e7ec.tar.gz
llvm-fb8f59156f0f208f6192ed808fc223eda6c0e7ec.tar.bz2
[AArch64][GlobalISel] Adopt dup(load) -> LD1R patterns from SelectionDAG
Follow-up of #65630.
-rw-r--r--llvm/lib/Target/AArch64/AArch64InstrGISel.td17
-rw-r--r--llvm/test/CodeGen/AArch64/arm64-indexed-vector-ldst.ll20
-rw-r--r--llvm/test/CodeGen/AArch64/arm64-ld1.ll2
-rw-r--r--llvm/test/CodeGen/AArch64/arm64-st1.ll2
4 files changed, 31 insertions, 10 deletions
diff --git a/llvm/lib/Target/AArch64/AArch64InstrGISel.td b/llvm/lib/Target/AArch64/AArch64InstrGISel.td
index 0f3ef23..b47605c 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrGISel.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrGISel.td
@@ -496,3 +496,20 @@ let AddedComplexity = 19 in {
defm : VecROStoreLane64_0Pat<ro16, store, v4i16, i16, hsub, STRHroW, STRHroX>;
defm : VecROStoreLane64_0Pat<ro32, store, v2i32, i32, ssub, STRSroW, STRSroX>;
}
+
+def : Pat<(v8i8 (AArch64dup (i8 (load (am_indexed8 GPR64sp:$Rn))))),
+ (LD1Rv8b GPR64sp:$Rn)>;
+def : Pat<(v16i8 (AArch64dup (i8 (load GPR64sp:$Rn)))),
+ (LD1Rv16b GPR64sp:$Rn)>;
+def : Pat<(v4i16 (AArch64dup (i16 (load GPR64sp:$Rn)))),
+ (LD1Rv4h GPR64sp:$Rn)>;
+def : Pat<(v8i16 (AArch64dup (i16 (load GPR64sp:$Rn)))),
+ (LD1Rv8h GPR64sp:$Rn)>;
+def : Pat<(v2i32 (AArch64dup (i32 (load GPR64sp:$Rn)))),
+ (LD1Rv2s GPR64sp:$Rn)>;
+def : Pat<(v4i32 (AArch64dup (i32 (load GPR64sp:$Rn)))),
+ (LD1Rv4s GPR64sp:$Rn)>;
+def : Pat<(v2i64 (AArch64dup (i64 (load GPR64sp:$Rn)))),
+ (LD1Rv2d GPR64sp:$Rn)>;
+def : Pat<(v1i64 (AArch64dup (i64 (load GPR64sp:$Rn)))),
+ (LD1Rv1d GPR64sp:$Rn)>;
diff --git a/llvm/test/CodeGen/AArch64/arm64-indexed-vector-ldst.ll b/llvm/test/CodeGen/AArch64/arm64-indexed-vector-ldst.ll
index 6657b19..6277162 100644
--- a/llvm/test/CodeGen/AArch64/arm64-indexed-vector-ldst.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-indexed-vector-ldst.ll
@@ -13794,8 +13794,9 @@ define <16 x i8> @test_v16i8_post_imm_ld1r(ptr %bar, ptr %ptr) {
;
; CHECK-GISEL-LABEL: test_v16i8_post_imm_ld1r:
; CHECK-GISEL: ; %bb.0:
-; CHECK-GISEL-NEXT: ld1r.16b { v0 }, [x0], #1
-; CHECK-GISEL-NEXT: str x0, [x1]
+; CHECK-GISEL-NEXT: ld1r.16b { v0 }, [x0]
+; CHECK-GISEL-NEXT: add x8, x0, #1
+; CHECK-GISEL-NEXT: str x8, [x1]
; CHECK-GISEL-NEXT: ret
%tmp1 = load i8, ptr %bar
%tmp2 = insertelement <16 x i8> <i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, i8 %tmp1, i32 0
@@ -13828,8 +13829,9 @@ define <16 x i8> @test_v16i8_post_reg_ld1r(ptr %bar, ptr %ptr, i64 %inc) {
;
; CHECK-GISEL-LABEL: test_v16i8_post_reg_ld1r:
; CHECK-GISEL: ; %bb.0:
-; CHECK-GISEL-NEXT: ld1r.16b { v0 }, [x0], x2
-; CHECK-GISEL-NEXT: str x0, [x1]
+; CHECK-GISEL-NEXT: ld1r.16b { v0 }, [x0]
+; CHECK-GISEL-NEXT: add x8, x0, x2
+; CHECK-GISEL-NEXT: str x8, [x1]
; CHECK-GISEL-NEXT: ret
%tmp1 = load i8, ptr %bar
%tmp2 = insertelement <16 x i8> <i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, i8 %tmp1, i32 0
@@ -13862,8 +13864,9 @@ define <8 x i8> @test_v8i8_post_imm_ld1r(ptr %bar, ptr %ptr) {
;
; CHECK-GISEL-LABEL: test_v8i8_post_imm_ld1r:
; CHECK-GISEL: ; %bb.0:
-; CHECK-GISEL-NEXT: ld1r.8b { v0 }, [x0], #1
-; CHECK-GISEL-NEXT: str x0, [x1]
+; CHECK-GISEL-NEXT: ld1r.8b { v0 }, [x0]
+; CHECK-GISEL-NEXT: add x8, x0, #1
+; CHECK-GISEL-NEXT: str x8, [x1]
; CHECK-GISEL-NEXT: ret
%tmp1 = load i8, ptr %bar
%tmp2 = insertelement <8 x i8> <i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, i8 %tmp1, i32 0
@@ -13888,8 +13891,9 @@ define <8 x i8> @test_v8i8_post_reg_ld1r(ptr %bar, ptr %ptr, i64 %inc) {
;
; CHECK-GISEL-LABEL: test_v8i8_post_reg_ld1r:
; CHECK-GISEL: ; %bb.0:
-; CHECK-GISEL-NEXT: ld1r.8b { v0 }, [x0], x2
-; CHECK-GISEL-NEXT: str x0, [x1]
+; CHECK-GISEL-NEXT: ld1r.8b { v0 }, [x0]
+; CHECK-GISEL-NEXT: add x8, x0, x2
+; CHECK-GISEL-NEXT: str x8, [x1]
; CHECK-GISEL-NEXT: ret
%tmp1 = load i8, ptr %bar
%tmp2 = insertelement <8 x i8> <i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, i8 %tmp1, i32 0
diff --git a/llvm/test/CodeGen/AArch64/arm64-ld1.ll b/llvm/test/CodeGen/AArch64/arm64-ld1.ll
index 96468b2..5b5ced1 100644
--- a/llvm/test/CodeGen/AArch64/arm64-ld1.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-ld1.ll
@@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,CHECK-SD
-; RUN: llc < %s -global-isel=1 -global-isel-abort=2 -mtriple=arm64-eabi -aarch64-neon-syntax=apple -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,CHECK-GI
+; RUN: llc < %s -global-isel=1 -global-isel-abort=1 -mtriple=arm64-eabi -aarch64-neon-syntax=apple -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,CHECK-GI
%struct.__neon_int8x8x2_t = type { <8 x i8>, <8 x i8> }
%struct.__neon_int8x8x3_t = type { <8 x i8>, <8 x i8>, <8 x i8> }
diff --git a/llvm/test/CodeGen/AArch64/arm64-st1.ll b/llvm/test/CodeGen/AArch64/arm64-st1.ll
index 121ca69..6f87c66 100644
--- a/llvm/test/CodeGen/AArch64/arm64-st1.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-st1.ll
@@ -1,5 +1,5 @@
; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple -verify-machineinstrs | FileCheck %s
-; RUN: llc < %s -global-isel -mtriple=arm64-eabi -aarch64-neon-syntax=apple -verify-machineinstrs | FileCheck %s
+; RUN: llc < %s -global-isel -global-isel-abort=1 -mtriple=arm64-eabi -aarch64-neon-syntax=apple -verify-machineinstrs | FileCheck %s
; The instruction latencies of Exynos-M3 trigger the transform we see under the Exynos check.
; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple -verify-machineinstrs -mcpu=exynos-m3 | FileCheck --check-prefix=EXYNOS %s