aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKyrylo Tkachov <kyrylo.tkachov@arm.com>2021-02-01 22:51:11 +0000
committerKyrylo Tkachov <kyrylo.tkachov@arm.com>2021-02-02 12:14:39 +0000
commite8062ad468062f40064d2a648f02f993fe0070ba (patch)
tree906887adfe634725f14bf56c88bf1034acc308d5
parent5cebc818213c1281d54f3d1d1676674f995241fa (diff)
downloadgcc-e8062ad468062f40064d2a648f02f993fe0070ba.zip
gcc-e8062ad468062f40064d2a648f02f993fe0070ba.tar.gz
gcc-e8062ad468062f40064d2a648f02f993fe0070ba.tar.bz2
aarch64: Add and use FLAG_LOAD in builtins
We already have a STORE flag that we use for builtins. This patch introduces a LOAD set that uses AUTO_FP and FLAG_READ_MEMORY. This allows for more aggressive optimisation of the load intrinsics. Turns out we have a great many testcases that do: float16x4x2_t f_vld2_lane_f16 (float16_t * p, float16x4x2_t v) { float16x4x2_t res; /* { dg-error "lane 4 out of range 0 - 3" "" { target *-*-* } 0 } */ res = vld2_lane_f16 (p, v, 4); /* { dg-error "lane -1 out of range 0 - 3" "" { target *-*-* } 0 } */ res = vld2_lane_f16 (p, v, -1); return res; } but since the first res is unused it now gets eliminated early on before we get to give an error message. Ideally we'd like to warn for both. This patch takes the conservative approach and doesn't convert the load-lane builtins to LOAD ; that's something we can improve later. gcc/ChangeLog: * config/aarch64/aarch64-builtins.c (FLAG_LOAD): Define. * config/aarch64/aarch64-simd-builtins.def (ld1x2, ld2, ld3, ld4, ld2r, ld3r, ld4r, ld1, ld1x3, ld1x4): Use LOAD flags.
-rw-r--r--gcc/config/aarch64/aarch64-builtins.c1
-rw-r--r--gcc/config/aarch64/aarch64-simd-builtins.def28
2 files changed, 15 insertions, 14 deletions
diff --git a/gcc/config/aarch64/aarch64-builtins.c b/gcc/config/aarch64/aarch64-builtins.c
index ffff059..25ab866 100644
--- a/gcc/config/aarch64/aarch64-builtins.c
+++ b/gcc/config/aarch64/aarch64-builtins.c
@@ -133,6 +133,7 @@ const unsigned int FLAG_FP = FLAG_READ_FPCR | FLAG_RAISE_FP_EXCEPTIONS;
const unsigned int FLAG_ALL = FLAG_READ_FPCR | FLAG_RAISE_FP_EXCEPTIONS
| FLAG_READ_MEMORY | FLAG_PREFETCH_MEMORY | FLAG_WRITE_MEMORY;
const unsigned int FLAG_STORE = FLAG_WRITE_MEMORY | FLAG_AUTO_FP;
+const unsigned int FLAG_LOAD = FLAG_READ_MEMORY | FLAG_AUTO_FP;
typedef struct
{
diff --git a/gcc/config/aarch64/aarch64-simd-builtins.def b/gcc/config/aarch64/aarch64-simd-builtins.def
index 45573c2..61731be 100644
--- a/gcc/config/aarch64/aarch64-simd-builtins.def
+++ b/gcc/config/aarch64/aarch64-simd-builtins.def
@@ -91,21 +91,21 @@
VAR1 (SETREGP, set_qregci, 0, AUTO_FP, v2di)
VAR1 (SETREGP, set_qregxi, 0, AUTO_FP, v2di)
/* Implemented by aarch64_ld1x2<VQ:mode>. */
- BUILTIN_VQ (LOADSTRUCT, ld1x2, 0, ALL)
+ BUILTIN_VQ (LOADSTRUCT, ld1x2, 0, LOAD)
/* Implemented by aarch64_ld1x2<VDC:mode>. */
- BUILTIN_VDC (LOADSTRUCT, ld1x2, 0, ALL)
+ BUILTIN_VDC (LOADSTRUCT, ld1x2, 0, LOAD)
/* Implemented by aarch64_ld<VSTRUCT:nregs><VDC:mode>. */
- BUILTIN_VDC (LOADSTRUCT, ld2, 0, ALL)
- BUILTIN_VDC (LOADSTRUCT, ld3, 0, ALL)
- BUILTIN_VDC (LOADSTRUCT, ld4, 0, ALL)
+ BUILTIN_VDC (LOADSTRUCT, ld2, 0, LOAD)
+ BUILTIN_VDC (LOADSTRUCT, ld3, 0, LOAD)
+ BUILTIN_VDC (LOADSTRUCT, ld4, 0, LOAD)
/* Implemented by aarch64_ld<VSTRUCT:nregs><VQ:mode>. */
- BUILTIN_VQ (LOADSTRUCT, ld2, 0, ALL)
- BUILTIN_VQ (LOADSTRUCT, ld3, 0, ALL)
- BUILTIN_VQ (LOADSTRUCT, ld4, 0, ALL)
+ BUILTIN_VQ (LOADSTRUCT, ld2, 0, LOAD)
+ BUILTIN_VQ (LOADSTRUCT, ld3, 0, LOAD)
+ BUILTIN_VQ (LOADSTRUCT, ld4, 0, LOAD)
/* Implemented by aarch64_ld<VSTRUCT:nregs>r<VALLDIF:mode>. */
- BUILTIN_VALLDIF (LOADSTRUCT, ld2r, 0, ALL)
- BUILTIN_VALLDIF (LOADSTRUCT, ld3r, 0, ALL)
- BUILTIN_VALLDIF (LOADSTRUCT, ld4r, 0, ALL)
+ BUILTIN_VALLDIF (LOADSTRUCT, ld2r, 0, LOAD)
+ BUILTIN_VALLDIF (LOADSTRUCT, ld3r, 0, LOAD)
+ BUILTIN_VALLDIF (LOADSTRUCT, ld4r, 0, LOAD)
/* Implemented by aarch64_ld<VSTRUCT:nregs>_lane<VQ:mode>. */
BUILTIN_VALLDIF (LOADSTRUCT_LANE, ld2_lane, 0, ALL)
BUILTIN_VALLDIF (LOADSTRUCT_LANE, ld3_lane, 0, ALL)
@@ -586,7 +586,7 @@
BUILTIN_VDF (UNOP, float_truncate_lo_, 0, FP)
/* Implemented by aarch64_ld1<VALL_F16:mode>. */
- BUILTIN_VALL_F16 (LOAD1, ld1, 0, ALL)
+ BUILTIN_VALL_F16 (LOAD1, ld1, 0, LOAD)
VAR1(STORE1P, ld1, 0, ALL, v2di)
/* Implemented by aarch64_st1<VALL_F16:mode>. */
@@ -594,10 +594,10 @@
VAR1 (STORE1P, st1, 0, STORE, v2di)
/* Implemented by aarch64_ld1x3<VALLDIF:mode>. */
- BUILTIN_VALLDIF (LOADSTRUCT, ld1x3, 0, ALL)
+ BUILTIN_VALLDIF (LOADSTRUCT, ld1x3, 0, LOAD)
/* Implemented by aarch64_ld1x4<VALLDIF:mode>. */
- BUILTIN_VALLDIF (LOADSTRUCT, ld1x4, 0, ALL)
+ BUILTIN_VALLDIF (LOADSTRUCT, ld1x4, 0, LOAD)
/* Implemented by aarch64_st1x2<VALLDIF:mode>. */
BUILTIN_VALLDIF (STORESTRUCT, st1x2, 0, STORE)