diff options
author | Vladislav Dzhidzhoev <vdzhidzhoev@accesssoftek.com> | 2023-08-14 11:16:04 +0200 |
---|---|---|
committer | Vladislav Dzhidzhoev <vdzhidzhoev@accesssoftek.com> | 2023-09-20 18:22:54 +0200 |
commit | fb8f59156f0f208f6192ed808fc223eda6c0e7ec (patch) | |
tree | 9d43d4630eaef600c5b4aef78a5fe3e96177bd09 | |
parent | de018f5ca4b2598f6bbf7a27a1fb51935792c276 (diff) | |
download | llvm-fb8f59156f0f208f6192ed808fc223eda6c0e7ec.zip llvm-fb8f59156f0f208f6192ed808fc223eda6c0e7ec.tar.gz llvm-fb8f59156f0f208f6192ed808fc223eda6c0e7ec.tar.bz2 |
[AArch64][GlobalISel] Adopt dup(load) -> LD1R patterns from SelectionDAG
Follow-up of #65630.
-rw-r--r-- | llvm/lib/Target/AArch64/AArch64InstrGISel.td | 17 | ||||
-rw-r--r-- | llvm/test/CodeGen/AArch64/arm64-indexed-vector-ldst.ll | 20 | ||||
-rw-r--r-- | llvm/test/CodeGen/AArch64/arm64-ld1.ll | 2 | ||||
-rw-r--r-- | llvm/test/CodeGen/AArch64/arm64-st1.ll | 2 |
4 files changed, 31 insertions, 10 deletions
diff --git a/llvm/lib/Target/AArch64/AArch64InstrGISel.td b/llvm/lib/Target/AArch64/AArch64InstrGISel.td index 0f3ef23..b47605c 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrGISel.td +++ b/llvm/lib/Target/AArch64/AArch64InstrGISel.td @@ -496,3 +496,20 @@ let AddedComplexity = 19 in { defm : VecROStoreLane64_0Pat<ro16, store, v4i16, i16, hsub, STRHroW, STRHroX>; defm : VecROStoreLane64_0Pat<ro32, store, v2i32, i32, ssub, STRSroW, STRSroX>; } + +def : Pat<(v8i8 (AArch64dup (i8 (load (am_indexed8 GPR64sp:$Rn))))), + (LD1Rv8b GPR64sp:$Rn)>; +def : Pat<(v16i8 (AArch64dup (i8 (load GPR64sp:$Rn)))), + (LD1Rv16b GPR64sp:$Rn)>; +def : Pat<(v4i16 (AArch64dup (i16 (load GPR64sp:$Rn)))), + (LD1Rv4h GPR64sp:$Rn)>; +def : Pat<(v8i16 (AArch64dup (i16 (load GPR64sp:$Rn)))), + (LD1Rv8h GPR64sp:$Rn)>; +def : Pat<(v2i32 (AArch64dup (i32 (load GPR64sp:$Rn)))), + (LD1Rv2s GPR64sp:$Rn)>; +def : Pat<(v4i32 (AArch64dup (i32 (load GPR64sp:$Rn)))), + (LD1Rv4s GPR64sp:$Rn)>; +def : Pat<(v2i64 (AArch64dup (i64 (load GPR64sp:$Rn)))), + (LD1Rv2d GPR64sp:$Rn)>; +def : Pat<(v1i64 (AArch64dup (i64 (load GPR64sp:$Rn)))), + (LD1Rv1d GPR64sp:$Rn)>; diff --git a/llvm/test/CodeGen/AArch64/arm64-indexed-vector-ldst.ll b/llvm/test/CodeGen/AArch64/arm64-indexed-vector-ldst.ll index 6657b19..6277162 100644 --- a/llvm/test/CodeGen/AArch64/arm64-indexed-vector-ldst.ll +++ b/llvm/test/CodeGen/AArch64/arm64-indexed-vector-ldst.ll @@ -13794,8 +13794,9 @@ define <16 x i8> @test_v16i8_post_imm_ld1r(ptr %bar, ptr %ptr) { ; ; CHECK-GISEL-LABEL: test_v16i8_post_imm_ld1r: ; CHECK-GISEL: ; %bb.0: -; CHECK-GISEL-NEXT: ld1r.16b { v0 }, [x0], #1 -; CHECK-GISEL-NEXT: str x0, [x1] +; CHECK-GISEL-NEXT: ld1r.16b { v0 }, [x0] +; CHECK-GISEL-NEXT: add x8, x0, #1 +; CHECK-GISEL-NEXT: str x8, [x1] ; CHECK-GISEL-NEXT: ret %tmp1 = load i8, ptr %bar %tmp2 = insertelement <16 x i8> <i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, i8 %tmp1, i32 0 @@ -13828,8 +13829,9 @@ define <16 x i8> @test_v16i8_post_reg_ld1r(ptr %bar, ptr %ptr, i64 %inc) { ; ; CHECK-GISEL-LABEL: test_v16i8_post_reg_ld1r: ; CHECK-GISEL: ; %bb.0: -; CHECK-GISEL-NEXT: ld1r.16b { v0 }, [x0], x2 -; CHECK-GISEL-NEXT: str x0, [x1] +; CHECK-GISEL-NEXT: ld1r.16b { v0 }, [x0] +; CHECK-GISEL-NEXT: add x8, x0, x2 +; CHECK-GISEL-NEXT: str x8, [x1] ; CHECK-GISEL-NEXT: ret %tmp1 = load i8, ptr %bar %tmp2 = insertelement <16 x i8> <i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, i8 %tmp1, i32 0 @@ -13862,8 +13864,9 @@ define <8 x i8> @test_v8i8_post_imm_ld1r(ptr %bar, ptr %ptr) { ; ; CHECK-GISEL-LABEL: test_v8i8_post_imm_ld1r: ; CHECK-GISEL: ; %bb.0: -; CHECK-GISEL-NEXT: ld1r.8b { v0 }, [x0], #1 -; CHECK-GISEL-NEXT: str x0, [x1] +; CHECK-GISEL-NEXT: ld1r.8b { v0 }, [x0] +; CHECK-GISEL-NEXT: add x8, x0, #1 +; CHECK-GISEL-NEXT: str x8, [x1] ; CHECK-GISEL-NEXT: ret %tmp1 = load i8, ptr %bar %tmp2 = insertelement <8 x i8> <i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, i8 %tmp1, i32 0 @@ -13888,8 +13891,9 @@ define <8 x i8> @test_v8i8_post_reg_ld1r(ptr %bar, ptr %ptr, i64 %inc) { ; ; CHECK-GISEL-LABEL: test_v8i8_post_reg_ld1r: ; CHECK-GISEL: ; %bb.0: -; CHECK-GISEL-NEXT: ld1r.8b { v0 }, [x0], x2 -; CHECK-GISEL-NEXT: str x0, [x1] +; CHECK-GISEL-NEXT: ld1r.8b { v0 }, [x0] +; CHECK-GISEL-NEXT: add x8, x0, x2 +; CHECK-GISEL-NEXT: str x8, [x1] ; CHECK-GISEL-NEXT: ret %tmp1 = load i8, ptr %bar %tmp2 = insertelement <8 x i8> <i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, i8 %tmp1, i32 0 diff --git a/llvm/test/CodeGen/AArch64/arm64-ld1.ll b/llvm/test/CodeGen/AArch64/arm64-ld1.ll index 96468b2..5b5ced1 100644 --- a/llvm/test/CodeGen/AArch64/arm64-ld1.ll +++ b/llvm/test/CodeGen/AArch64/arm64-ld1.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 ; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,CHECK-SD -; RUN: llc < %s -global-isel=1 -global-isel-abort=2 -mtriple=arm64-eabi -aarch64-neon-syntax=apple -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,CHECK-GI +; RUN: llc < %s -global-isel=1 -global-isel-abort=1 -mtriple=arm64-eabi -aarch64-neon-syntax=apple -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,CHECK-GI %struct.__neon_int8x8x2_t = type { <8 x i8>, <8 x i8> } %struct.__neon_int8x8x3_t = type { <8 x i8>, <8 x i8>, <8 x i8> } diff --git a/llvm/test/CodeGen/AArch64/arm64-st1.ll b/llvm/test/CodeGen/AArch64/arm64-st1.ll index 121ca69..6f87c66 100644 --- a/llvm/test/CodeGen/AArch64/arm64-st1.ll +++ b/llvm/test/CodeGen/AArch64/arm64-st1.ll @@ -1,5 +1,5 @@ ; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple -verify-machineinstrs | FileCheck %s -; RUN: llc < %s -global-isel -mtriple=arm64-eabi -aarch64-neon-syntax=apple -verify-machineinstrs | FileCheck %s +; RUN: llc < %s -global-isel -global-isel-abort=1 -mtriple=arm64-eabi -aarch64-neon-syntax=apple -verify-machineinstrs | FileCheck %s ; The instruction latencies of Exynos-M3 trigger the transform we see under the Exynos check. ; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple -verify-machineinstrs -mcpu=exynos-m3 | FileCheck --check-prefix=EXYNOS %s |