aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
authorKyrylo Tkachov <ktkachov@gcc.gnu.org>2018-03-20 17:13:16 +0000
committerKyrylo Tkachov <ktkachov@gcc.gnu.org>2018-03-20 17:13:16 +0000
commit770ebe99fe48aca10f3553c4195deba1757d328a (patch)
tree16f58159f03d1ab59f47648999b34f8d718cbae5 /gcc
parent6f87580f7d0726d9683ca0f4a703a857f06f00d5 (diff)
downloadgcc-770ebe99fe48aca10f3553c4195deba1757d328a.zip
gcc-770ebe99fe48aca10f3553c4195deba1757d328a.tar.gz
gcc-770ebe99fe48aca10f3553c4195deba1757d328a.tar.bz2
This PR shows that we get the load/store_lanes logic wrong for arm big-endian.
It is tricky to get right. Aarch64 does it by adding the appropriate lane-swapping operations during expansion. I'd like to do the same on arm eventually, but we'd need to port and validate the VTBL-generating code and add it to all the right places and I'm not comfortable enough doing it for GCC 8, but I am keen in getting the wrong-code fixed. As I say in the PR, vectorisation on armeb is already severely restricted (we disable many patterns on BYTES_BIG_ENDIAN) and the load/store_lanes patterns really were not working properly at all, so disabling them is not a radical approach. The way to do that is to return false in ARRAY_MODE_SUPPORTED_P for BYTES_BIG_ENDIAN. Bootstrapped and tested on arm-none-linux-gnueabihf. Also tested on armeb-none-eabi. PR target/82518 * config/arm/arm.c (arm_array_mode_supported_p): Return false for BYTES_BIG_ENDIAN. * lib/target-supports.exp (check_effective_target_vect_load_lanes): Disable for armeb targets. * gcc.target/arm/pr82518.c: New test. From-SVN: r258687
Diffstat (limited to 'gcc')
-rw-r--r--gcc/config/arm/arm.c5
-rw-r--r--gcc/testsuite/gcc.target/arm/pr82518.c29
-rw-r--r--gcc/testsuite/lib/target-supports.exp3
3 files changed, 35 insertions, 2 deletions
diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
index 90d62e6..cb6ab81 100644
--- a/gcc/config/arm/arm.c
+++ b/gcc/config/arm/arm.c
@@ -27162,7 +27162,10 @@ static bool
arm_array_mode_supported_p (machine_mode mode,
unsigned HOST_WIDE_INT nelems)
{
- if (TARGET_NEON
+ /* We don't want to enable interleaved loads and stores for BYTES_BIG_ENDIAN
+ for now, as the lane-swapping logic needs to be extended in the expanders.
+ See PR target/82518. */
+ if (TARGET_NEON && !BYTES_BIG_ENDIAN
&& (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
&& (nelems >= 2 && nelems <= 4))
return true;
diff --git a/gcc/testsuite/gcc.target/arm/pr82518.c b/gcc/testsuite/gcc.target/arm/pr82518.c
new file mode 100644
index 0000000..c3e45b8
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/pr82518.c
@@ -0,0 +1,29 @@
+/* { dg-do run } */
+/* { dg-require-effective-target arm_neon_ok } */
+/* { dg-additional-options "-O3 -fno-inline -std=gnu99" } */
+/* { dg-add-options arm_neon } */
+
+typedef struct { int x, y; } X;
+
+void f4(X *p, int n)
+{
+ for (int i = 0; i < n; i++)
+ { p[i].x = i;
+ p[i].y = i + 1;
+ }
+}
+
+__attribute ((aligned (16))) X arr[100];
+
+int main(void)
+{
+ volatile int fail = 0;
+ f4 (arr, 100);
+ for (int i = 0; i < 100; i++)
+ if (arr[i].y != i+1 || arr[i].x != i)
+ fail = 1;
+ if (fail)
+ __builtin_abort ();
+
+ return 0;
+}
diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp
index a3ce690..55e7269 100644
--- a/gcc/testsuite/lib/target-supports.exp
+++ b/gcc/testsuite/lib/target-supports.exp
@@ -6611,7 +6611,8 @@ proc check_effective_target_vect_load_lanes { } {
verbose "check_effective_target_vect_load_lanes: using cached result" 2
} else {
set et_vect_load_lanes 0
- if { ([istarget arm*-*-*] && [check_effective_target_arm_neon_ok])
+ # We don't support load_lanes correctly on big-endian arm.
+ if { ([istarget arm-*-*] && [check_effective_target_arm_neon_ok])
|| [istarget aarch64*-*-*] } {
set et_vect_load_lanes 1
}