diff options
author | Kyrylo Tkachov <ktkachov@gcc.gnu.org> | 2018-03-20 17:13:16 +0000 |
---|---|---|
committer | Kyrylo Tkachov <ktkachov@gcc.gnu.org> | 2018-03-20 17:13:16 +0000 |
commit | 770ebe99fe48aca10f3553c4195deba1757d328a (patch) | |
tree | 16f58159f03d1ab59f47648999b34f8d718cbae5 /gcc | |
parent | 6f87580f7d0726d9683ca0f4a703a857f06f00d5 (diff) | |
download | gcc-770ebe99fe48aca10f3553c4195deba1757d328a.zip gcc-770ebe99fe48aca10f3553c4195deba1757d328a.tar.gz gcc-770ebe99fe48aca10f3553c4195deba1757d328a.tar.bz2 |
This PR shows that we get the load/store_lanes logic wrong for arm big-endian.
It is tricky to get right. Aarch64 does it by adding the appropriate lane-swapping
operations during expansion.
I'd like to do the same on arm eventually, but we'd need to port and validate the VTBL-generating
code and add it to all the right places and I'm not comfortable enough doing it for GCC 8, but I am keen
in getting the wrong-code fixed.
As I say in the PR, vectorisation on armeb is already severely restricted (we disable many patterns on BYTES_BIG_ENDIAN)
and the load/store_lanes patterns really were not working properly at all, so disabling them is not
a radical approach.
The way to do that is to return false in ARRAY_MODE_SUPPORTED_P for BYTES_BIG_ENDIAN.
Bootstrapped and tested on arm-none-linux-gnueabihf.
Also tested on armeb-none-eabi.
PR target/82518
* config/arm/arm.c (arm_array_mode_supported_p): Return false for
BYTES_BIG_ENDIAN.
* lib/target-supports.exp (check_effective_target_vect_load_lanes):
Disable for armeb targets.
* gcc.target/arm/pr82518.c: New test.
From-SVN: r258687
Diffstat (limited to 'gcc')
-rw-r--r-- | gcc/config/arm/arm.c | 5 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/arm/pr82518.c | 29 | ||||
-rw-r--r-- | gcc/testsuite/lib/target-supports.exp | 3 |
3 files changed, 35 insertions, 2 deletions
diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c index 90d62e6..cb6ab81 100644 --- a/gcc/config/arm/arm.c +++ b/gcc/config/arm/arm.c @@ -27162,7 +27162,10 @@ static bool arm_array_mode_supported_p (machine_mode mode, unsigned HOST_WIDE_INT nelems) { - if (TARGET_NEON + /* We don't want to enable interleaved loads and stores for BYTES_BIG_ENDIAN + for now, as the lane-swapping logic needs to be extended in the expanders. + See PR target/82518. */ + if (TARGET_NEON && !BYTES_BIG_ENDIAN && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode)) && (nelems >= 2 && nelems <= 4)) return true; diff --git a/gcc/testsuite/gcc.target/arm/pr82518.c b/gcc/testsuite/gcc.target/arm/pr82518.c new file mode 100644 index 0000000..c3e45b8 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/pr82518.c @@ -0,0 +1,29 @@ +/* { dg-do run } */ +/* { dg-require-effective-target arm_neon_ok } */ +/* { dg-additional-options "-O3 -fno-inline -std=gnu99" } */ +/* { dg-add-options arm_neon } */ + +typedef struct { int x, y; } X; + +void f4(X *p, int n) +{ + for (int i = 0; i < n; i++) + { p[i].x = i; + p[i].y = i + 1; + } +} + +__attribute ((aligned (16))) X arr[100]; + +int main(void) +{ + volatile int fail = 0; + f4 (arr, 100); + for (int i = 0; i < 100; i++) + if (arr[i].y != i+1 || arr[i].x != i) + fail = 1; + if (fail) + __builtin_abort (); + + return 0; +} diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp index a3ce690..55e7269 100644 --- a/gcc/testsuite/lib/target-supports.exp +++ b/gcc/testsuite/lib/target-supports.exp @@ -6611,7 +6611,8 @@ proc check_effective_target_vect_load_lanes { } { verbose "check_effective_target_vect_load_lanes: using cached result" 2 } else { set et_vect_load_lanes 0 - if { ([istarget arm*-*-*] && [check_effective_target_arm_neon_ok]) + # We don't support load_lanes correctly on big-endian arm. + if { ([istarget arm-*-*] && [check_effective_target_arm_neon_ok]) || [istarget aarch64*-*-*] } { set et_vect_load_lanes 1 } |