aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRichard Biener <rguenther@suse.de>2013-05-10 07:52:25 +0000
committerRichard Biener <rguenth@gcc.gnu.org>2013-05-10 07:52:25 +0000
commitafb119beca72d64c166127445ca6883358e24174 (patch)
treeb74da88625d1181dbe2a76c0891227920028cbd0
parent01ae486155f3c4fb45a52bb4e95527d072b84966 (diff)
downloadgcc-afb119beca72d64c166127445ca6883358e24174.zip
gcc-afb119beca72d64c166127445ca6883358e24174.tar.gz
gcc-afb119beca72d64c166127445ca6883358e24174.tar.bz2
tree-vect-data-refs.c (vect_enhance_data_refs_alignment): Do not disable peeling when we version for aliasing.
2013-05-10 Richard Biener <rguenther@suse.de> * tree-vect-data-refs.c (vect_enhance_data_refs_alignment): Do not disable peeling when we version for aliasing. (vector_alignment_reachable_p): Honor explicit user alignment. (vect_supportable_dr_alignment): Likewise. * tree-vect-loop-manip.c (vect_can_advance_ivs_p): Use STMT_VINFO_LOOP_PHI_EVOLUTION_PART instead of recomputing it. * tree-vect-loop.c (vect_transform_loop): First apply versioning, then peeling to arrange for the cost-model check to come first. * gcc.target/i386/avx256-unaligned-load-2.c: Make well-defined. * gcc.target/i386/l_fma_double_1.c: Adjust. * gcc.target/i386/l_fma_double_2.c: Likewise. * gcc.target/i386/l_fma_double_3.c: Likewise. * gcc.target/i386/l_fma_double_4.c: Likewise. * gcc.target/i386/l_fma_double_5.c: Likewise. * gcc.target/i386/l_fma_double_6.c: Likewise. * gcc.target/i386/l_fma_float_1.c: Likewise. * gcc.target/i386/l_fma_float_2.c: Likewise. * gcc.target/i386/l_fma_float_3.c: Likewise. * gcc.target/i386/l_fma_float_4.c: Likewise. * gcc.target/i386/l_fma_float_5.c: Likewise. * gcc.target/i386/l_fma_float_6.c: Likewise. From-SVN: r198767
-rw-r--r--gcc/ChangeLog11
-rw-r--r--gcc/testsuite/ChangeLog16
-rw-r--r--gcc/testsuite/gcc.target/i386/avx256-unaligned-load-2.c21
-rw-r--r--gcc/testsuite/gcc.target/i386/l_fma_double_1.c23
-rw-r--r--gcc/testsuite/gcc.target/i386/l_fma_double_2.c11
-rw-r--r--gcc/testsuite/gcc.target/i386/l_fma_double_3.c23
-rw-r--r--gcc/testsuite/gcc.target/i386/l_fma_double_4.c11
-rw-r--r--gcc/testsuite/gcc.target/i386/l_fma_double_5.c11
-rw-r--r--gcc/testsuite/gcc.target/i386/l_fma_double_6.c11
-rw-r--r--gcc/testsuite/gcc.target/i386/l_fma_float_1.c20
-rw-r--r--gcc/testsuite/gcc.target/i386/l_fma_float_2.c8
-rw-r--r--gcc/testsuite/gcc.target/i386/l_fma_float_3.c20
-rw-r--r--gcc/testsuite/gcc.target/i386/l_fma_float_4.c8
-rw-r--r--gcc/testsuite/gcc.target/i386/l_fma_float_5.c8
-rw-r--r--gcc/testsuite/gcc.target/i386/l_fma_float_6.c8
-rw-r--r--gcc/tree-vect-data-refs.c29
-rw-r--r--gcc/tree-vect-loop-manip.c27
-rw-r--r--gcc/tree-vect-loop.c17
18 files changed, 141 insertions, 142 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index b6101ba..c7d6db9 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,14 @@
+2013-05-10 Richard Biener <rguenther@suse.de>
+
+ * tree-vect-data-refs.c (vect_enhance_data_refs_alignment): Do not
+ disable peeling when we version for aliasing.
+ (vector_alignment_reachable_p): Honor explicit user alignment.
+ (vect_supportable_dr_alignment): Likewise.
+ * tree-vect-loop-manip.c (vect_can_advance_ivs_p): Use
+ STMT_VINFO_LOOP_PHI_EVOLUTION_PART instead of recomputing it.
+ * tree-vect-loop.c (vect_transform_loop): First apply versioning,
+ then peeling to arrange for the cost-model check to come first.
+
2013-05-10 Alan Modra <amodra@gmail.com>
* configure.ac (HAVE_AS_TLS): Swap powerpc64 and powerpc cases.
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index 3ab939a..7656de9 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,19 @@
+2013-05-10 Richard Biener <rguenther@suse.de>
+
+ * gcc.target/i386/avx256-unaligned-load-2.c: Make well-defined.
+ * gcc.target/i386/l_fma_double_1.c: Adjust.
+ * gcc.target/i386/l_fma_double_2.c: Likewise.
+ * gcc.target/i386/l_fma_double_3.c: Likewise.
+ * gcc.target/i386/l_fma_double_4.c: Likewise.
+ * gcc.target/i386/l_fma_double_5.c: Likewise.
+ * gcc.target/i386/l_fma_double_6.c: Likewise.
+ * gcc.target/i386/l_fma_float_1.c: Likewise.
+ * gcc.target/i386/l_fma_float_2.c: Likewise.
+ * gcc.target/i386/l_fma_float_3.c: Likewise.
+ * gcc.target/i386/l_fma_float_4.c: Likewise.
+ * gcc.target/i386/l_fma_float_5.c: Likewise.
+ * gcc.target/i386/l_fma_float_6.c: Likewise.
+
2013-05-08 Paolo Carlini <paolo.carlini@oracle.com>
PR c++/51226
diff --git a/gcc/testsuite/gcc.target/i386/avx256-unaligned-load-2.c b/gcc/testsuite/gcc.target/i386/avx256-unaligned-load-2.c
index 2947d9e..e3ec854 100644
--- a/gcc/testsuite/gcc.target/i386/avx256-unaligned-load-2.c
+++ b/gcc/testsuite/gcc.target/i386/avx256-unaligned-load-2.c
@@ -1,26 +1,13 @@
/* { dg-do compile { target { ! ia32 } } } */
/* { dg-options "-O3 -dp -mavx -mavx256-split-unaligned-load" } */
-#define N 1024
-
-char **ep;
-char **fp;
-
void
-avx_test (void)
+avx_test (char **cp, char **ep)
{
int i;
- char **ap;
- char **bp;
- char **cp;
-
- ap = ep;
- bp = fp;
- for (i = 128; i >= 0; i--)
- {
- *ap++ = *cp++;
- *bp++ = 0;
- }
+ char **ap = __builtin_assume_aligned (ep, 32);
+ for (i = 128; i > 0; i--)
+ *ap++ = *cp++;
}
/* { dg-final { scan-assembler-not "avx_loaddqu256" } } */
diff --git a/gcc/testsuite/gcc.target/i386/l_fma_double_1.c b/gcc/testsuite/gcc.target/i386/l_fma_double_1.c
index 2706593..b3ffcf2 100644
--- a/gcc/testsuite/gcc.target/i386/l_fma_double_1.c
+++ b/gcc/testsuite/gcc.target/i386/l_fma_double_1.c
@@ -4,23 +4,24 @@
/* Test that the compiler properly optimizes floating point multiply
and add instructions into FMA3 instructions. */
-#define TYPE double
+typedef double adouble __attribute__((aligned(sizeof (double))));
+#define TYPE adouble
#include "l_fma_1.h"
/* { dg-final { scan-assembler-times "vfmadd132pd" 4 } } */
-/* { dg-final { scan-assembler-times "vfmadd231pd" 4 } } */
+/* { dg-final { scan-assembler-times "vfmadd213pd" 4 } } */
/* { dg-final { scan-assembler-times "vfmsub132pd" 4 } } */
-/* { dg-final { scan-assembler-times "vfmsub231pd" 4 } } */
+/* { dg-final { scan-assembler-times "vfmsub213pd" 4 } } */
/* { dg-final { scan-assembler-times "vfnmadd132pd" 4 } } */
/* { dg-final { scan-assembler-times "vfnmadd231pd" 4 } } */
/* { dg-final { scan-assembler-times "vfnmsub132pd" 4 } } */
/* { dg-final { scan-assembler-times "vfnmsub231pd" 4 } } */
-/* { dg-final { scan-assembler-times "vfmadd132sd" 16 } } */
-/* { dg-final { scan-assembler-times "vfmadd213sd" 16 } } */
-/* { dg-final { scan-assembler-times "vfmsub132sd" 16 } } */
-/* { dg-final { scan-assembler-times "vfmsub213sd" 16 } } */
-/* { dg-final { scan-assembler-times "vfnmadd132sd" 16 } } */
-/* { dg-final { scan-assembler-times "vfnmadd213sd" 16 } } */
-/* { dg-final { scan-assembler-times "vfnmsub132sd" 16 } } */
-/* { dg-final { scan-assembler-times "vfnmsub213sd" 16 } } */
+/* { dg-final { scan-assembler-times "vfmadd132sd" 28 } } */
+/* { dg-final { scan-assembler-times "vfmadd213sd" 28 } } */
+/* { dg-final { scan-assembler-times "vfmsub132sd" 28 } } */
+/* { dg-final { scan-assembler-times "vfmsub213sd" 28 } } */
+/* { dg-final { scan-assembler-times "vfnmadd132sd" 28 } } */
+/* { dg-final { scan-assembler-times "vfnmadd213sd" 28 } } */
+/* { dg-final { scan-assembler-times "vfnmsub132sd" 28 } } */
+/* { dg-final { scan-assembler-times "vfnmsub213sd" 28 } } */
diff --git a/gcc/testsuite/gcc.target/i386/l_fma_double_2.c b/gcc/testsuite/gcc.target/i386/l_fma_double_2.c
index e8933e2..713b24b 100644
--- a/gcc/testsuite/gcc.target/i386/l_fma_double_2.c
+++ b/gcc/testsuite/gcc.target/i386/l_fma_double_2.c
@@ -4,7 +4,8 @@
/* Test that the compiler properly optimizes floating point multiply
and add instructions into FMA3 instructions. */
-#define TYPE double
+typedef double adouble __attribute__((aligned(sizeof (double))));
+#define TYPE adouble
#include "l_fma_2.h"
@@ -12,7 +13,7 @@
/* { dg-final { scan-assembler-times "vfmsub132pd" 8 } } */
/* { dg-final { scan-assembler-times "vfnmadd132pd" 8 } } */
/* { dg-final { scan-assembler-times "vfnmsub132pd" 8 } } */
-/* { dg-final { scan-assembler-times "vfmadd132sd" 32 } } */
-/* { dg-final { scan-assembler-times "vfmsub132sd" 32 } } */
-/* { dg-final { scan-assembler-times "vfnmadd132sd" 32 } } */
-/* { dg-final { scan-assembler-times "vfnmsub132sd" 32 } } */
+/* { dg-final { scan-assembler-times "vfmadd132sd" 56 } } */
+/* { dg-final { scan-assembler-times "vfmsub132sd" 56 } } */
+/* { dg-final { scan-assembler-times "vfnmadd132sd" 56 } } */
+/* { dg-final { scan-assembler-times "vfnmsub132sd" 56 } } */
diff --git a/gcc/testsuite/gcc.target/i386/l_fma_double_3.c b/gcc/testsuite/gcc.target/i386/l_fma_double_3.c
index 00c7567..cbc6ef8 100644
--- a/gcc/testsuite/gcc.target/i386/l_fma_double_3.c
+++ b/gcc/testsuite/gcc.target/i386/l_fma_double_3.c
@@ -4,23 +4,24 @@
/* Test that the compiler properly optimizes floating point multiply
and add instructions into FMA3 instructions. */
-#define TYPE double
+typedef double adouble __attribute__((aligned(sizeof (double))));
+#define TYPE adouble
#include "l_fma_3.h"
/* { dg-final { scan-assembler-times "vfmadd132pd" 4 } } */
-/* { dg-final { scan-assembler-times "vfmadd231pd" 4 } } */
+/* { dg-final { scan-assembler-times "vfmadd213pd" 4 } } */
/* { dg-final { scan-assembler-times "vfmsub132pd" 4 } } */
-/* { dg-final { scan-assembler-times "vfmsub231pd" 4 } } */
+/* { dg-final { scan-assembler-times "vfmsub213pd" 4 } } */
/* { dg-final { scan-assembler-times "vfnmadd132pd" 4 } } */
/* { dg-final { scan-assembler-times "vfnmadd231pd" 4 } } */
/* { dg-final { scan-assembler-times "vfnmsub132pd" 4 } } */
/* { dg-final { scan-assembler-times "vfnmsub231pd" 4 } } */
-/* { dg-final { scan-assembler-times "vfmadd132sd" 16 } } */
-/* { dg-final { scan-assembler-times "vfmadd213sd" 16 } } */
-/* { dg-final { scan-assembler-times "vfmsub132sd" 16 } } */
-/* { dg-final { scan-assembler-times "vfmsub213sd" 16 } } */
-/* { dg-final { scan-assembler-times "vfnmadd132sd" 16 } } */
-/* { dg-final { scan-assembler-times "vfnmadd213sd" 16 } } */
-/* { dg-final { scan-assembler-times "vfnmsub132sd" 16 } } */
-/* { dg-final { scan-assembler-times "vfnmsub213sd" 16 } } */
+/* { dg-final { scan-assembler-times "vfmadd132sd" 28 } } */
+/* { dg-final { scan-assembler-times "vfmadd213sd" 28 } } */
+/* { dg-final { scan-assembler-times "vfmsub132sd" 28 } } */
+/* { dg-final { scan-assembler-times "vfmsub213sd" 28 } } */
+/* { dg-final { scan-assembler-times "vfnmadd132sd" 28 } } */
+/* { dg-final { scan-assembler-times "vfnmadd213sd" 28 } } */
+/* { dg-final { scan-assembler-times "vfnmsub132sd" 28 } } */
+/* { dg-final { scan-assembler-times "vfnmsub213sd" 28 } } */
diff --git a/gcc/testsuite/gcc.target/i386/l_fma_double_4.c b/gcc/testsuite/gcc.target/i386/l_fma_double_4.c
index 09970bd..d571aca 100644
--- a/gcc/testsuite/gcc.target/i386/l_fma_double_4.c
+++ b/gcc/testsuite/gcc.target/i386/l_fma_double_4.c
@@ -4,7 +4,8 @@
/* Test that the compiler properly optimizes floating point multiply
and add instructions into FMA3 instructions. */
-#define TYPE double
+typedef double adouble __attribute__((aligned(sizeof (double))));
+#define TYPE adouble
#include "l_fma_4.h"
@@ -12,7 +13,7 @@
/* { dg-final { scan-assembler-times "vfmsub132pd" 8 } } */
/* { dg-final { scan-assembler-times "vfnmadd132pd" 8 } } */
/* { dg-final { scan-assembler-times "vfnmsub132pd" 8 } } */
-/* { dg-final { scan-assembler-times "vfmadd132sd" 32 } } */
-/* { dg-final { scan-assembler-times "vfmsub132sd" 32 } } */
-/* { dg-final { scan-assembler-times "vfnmadd132sd" 32 } } */
-/* { dg-final { scan-assembler-times "vfnmsub132sd" 32 } } */
+/* { dg-final { scan-assembler-times "vfmadd132sd" 56 } } */
+/* { dg-final { scan-assembler-times "vfmsub132sd" 56 } } */
+/* { dg-final { scan-assembler-times "vfnmadd132sd" 56 } } */
+/* { dg-final { scan-assembler-times "vfnmsub132sd" 56 } } */
diff --git a/gcc/testsuite/gcc.target/i386/l_fma_double_5.c b/gcc/testsuite/gcc.target/i386/l_fma_double_5.c
index 2a1428e..56d8636 100644
--- a/gcc/testsuite/gcc.target/i386/l_fma_double_5.c
+++ b/gcc/testsuite/gcc.target/i386/l_fma_double_5.c
@@ -4,7 +4,8 @@
/* Test that the compiler properly optimizes floating point multiply
and add instructions into FMA3 instructions. */
-#define TYPE double
+typedef double adouble __attribute__((aligned(sizeof (double))));
+#define TYPE adouble
#include "l_fma_5.h"
@@ -12,7 +13,7 @@
/* { dg-final { scan-assembler-times "vfmsub132pd" 8 } } */
/* { dg-final { scan-assembler-times "vfnmadd132pd" 8 } } */
/* { dg-final { scan-assembler-times "vfnmsub132pd" 8 } } */
-/* { dg-final { scan-assembler-times "vfmadd132sd" 32 } } */
-/* { dg-final { scan-assembler-times "vfmsub132sd" 32 } } */
-/* { dg-final { scan-assembler-times "vfnmadd132sd" 32 } } */
-/* { dg-final { scan-assembler-times "vfnmsub132sd" 32 } } */
+/* { dg-final { scan-assembler-times "vfmadd132sd" 56 } } */
+/* { dg-final { scan-assembler-times "vfmsub132sd" 56 } } */
+/* { dg-final { scan-assembler-times "vfnmadd132sd" 56 } } */
+/* { dg-final { scan-assembler-times "vfnmsub132sd" 56 } } */
diff --git a/gcc/testsuite/gcc.target/i386/l_fma_double_6.c b/gcc/testsuite/gcc.target/i386/l_fma_double_6.c
index 092032a..f22763d 100644
--- a/gcc/testsuite/gcc.target/i386/l_fma_double_6.c
+++ b/gcc/testsuite/gcc.target/i386/l_fma_double_6.c
@@ -4,7 +4,8 @@
/* Test that the compiler properly optimizes floating point multiply
and add instructions into FMA3 instructions. */
-#define TYPE double
+typedef double adouble __attribute__((aligned(sizeof (double))));
+#define TYPE adouble
#include "l_fma_6.h"
@@ -12,7 +13,7 @@
/* { dg-final { scan-assembler-times "vfmsub132pd" 8 } } */
/* { dg-final { scan-assembler-times "vfnmadd132pd" 8 } } */
/* { dg-final { scan-assembler-times "vfnmsub132pd" 8 } } */
-/* { dg-final { scan-assembler-times "vfmadd132sd" 32 } } */
-/* { dg-final { scan-assembler-times "vfmsub132sd" 32 } } */
-/* { dg-final { scan-assembler-times "vfnmadd132sd" 32 } } */
-/* { dg-final { scan-assembler-times "vfnmsub132sd" 32 } } */
+/* { dg-final { scan-assembler-times "vfmadd132sd" 56 } } */
+/* { dg-final { scan-assembler-times "vfmsub132sd" 56 } } */
+/* { dg-final { scan-assembler-times "vfnmadd132sd" 56 } } */
+/* { dg-final { scan-assembler-times "vfnmsub132sd" 56 } } */
diff --git a/gcc/testsuite/gcc.target/i386/l_fma_float_1.c b/gcc/testsuite/gcc.target/i386/l_fma_float_1.c
index 4bcd81d..b2f58ac 100644
--- a/gcc/testsuite/gcc.target/i386/l_fma_float_1.c
+++ b/gcc/testsuite/gcc.target/i386/l_fma_float_1.c
@@ -9,18 +9,18 @@
#include "l_fma_1.h"
/* { dg-final { scan-assembler-times "vfmadd132ps" 4 } } */
-/* { dg-final { scan-assembler-times "vfmadd231ps" 4 } } */
+/* { dg-final { scan-assembler-times "vfmadd213ps" 4 } } */
/* { dg-final { scan-assembler-times "vfmsub132ps" 4 } } */
-/* { dg-final { scan-assembler-times "vfmsub231ps" 4 } } */
+/* { dg-final { scan-assembler-times "vfmsub213ps" 4 } } */
/* { dg-final { scan-assembler-times "vfnmadd132ps" 4 } } */
/* { dg-final { scan-assembler-times "vfnmadd231ps" 4 } } */
/* { dg-final { scan-assembler-times "vfnmsub132ps" 4 } } */
/* { dg-final { scan-assembler-times "vfnmsub231ps" 4 } } */
-/* { dg-final { scan-assembler-times "vfmadd132ss" 32 } } */
-/* { dg-final { scan-assembler-times "vfmadd213ss" 32 } } */
-/* { dg-final { scan-assembler-times "vfmsub132ss" 32 } } */
-/* { dg-final { scan-assembler-times "vfmsub213ss" 32 } } */
-/* { dg-final { scan-assembler-times "vfnmadd132ss" 32 } } */
-/* { dg-final { scan-assembler-times "vfnmadd213ss" 32 } } */
-/* { dg-final { scan-assembler-times "vfnmsub132ss" 32 } } */
-/* { dg-final { scan-assembler-times "vfnmsub213ss" 32 } } */
+/* { dg-final { scan-assembler-times "vfmadd132ss" 60 } } */
+/* { dg-final { scan-assembler-times "vfmadd213ss" 60 } } */
+/* { dg-final { scan-assembler-times "vfmsub132ss" 60 } } */
+/* { dg-final { scan-assembler-times "vfmsub213ss" 60 } } */
+/* { dg-final { scan-assembler-times "vfnmadd132ss" 60 } } */
+/* { dg-final { scan-assembler-times "vfnmadd213ss" 60 } } */
+/* { dg-final { scan-assembler-times "vfnmsub132ss" 60 } } */
+/* { dg-final { scan-assembler-times "vfnmsub213ss" 60 } } */
diff --git a/gcc/testsuite/gcc.target/i386/l_fma_float_2.c b/gcc/testsuite/gcc.target/i386/l_fma_float_2.c
index 34b7fcb..6377585 100644
--- a/gcc/testsuite/gcc.target/i386/l_fma_float_2.c
+++ b/gcc/testsuite/gcc.target/i386/l_fma_float_2.c
@@ -12,7 +12,7 @@
/* { dg-final { scan-assembler-times "vfmsub132ps" 8 } } */
/* { dg-final { scan-assembler-times "vfnmadd132ps" 8 } } */
/* { dg-final { scan-assembler-times "vfnmsub132ps" 8 } } */
-/* { dg-final { scan-assembler-times "vfmadd132ss" 64 } } */
-/* { dg-final { scan-assembler-times "vfmsub132ss" 64 } } */
-/* { dg-final { scan-assembler-times "vfnmadd132ss" 64 } } */
-/* { dg-final { scan-assembler-times "vfnmsub132ss" 64 } } */
+/* { dg-final { scan-assembler-times "vfmadd132ss" 120 } } */
+/* { dg-final { scan-assembler-times "vfmsub132ss" 120 } } */
+/* { dg-final { scan-assembler-times "vfnmadd132ss" 120 } } */
+/* { dg-final { scan-assembler-times "vfnmsub132ss" 120 } } */
diff --git a/gcc/testsuite/gcc.target/i386/l_fma_float_3.c b/gcc/testsuite/gcc.target/i386/l_fma_float_3.c
index 6ff2c6e..878babb 100644
--- a/gcc/testsuite/gcc.target/i386/l_fma_float_3.c
+++ b/gcc/testsuite/gcc.target/i386/l_fma_float_3.c
@@ -9,18 +9,18 @@
#include "l_fma_3.h"
/* { dg-final { scan-assembler-times "vfmadd132ps" 4 } } */
-/* { dg-final { scan-assembler-times "vfmadd231ps" 4 } } */
+/* { dg-final { scan-assembler-times "vfmadd213ps" 4 } } */
/* { dg-final { scan-assembler-times "vfmsub132ps" 4 } } */
-/* { dg-final { scan-assembler-times "vfmsub231ps" 4 } } */
+/* { dg-final { scan-assembler-times "vfmsub213ps" 4 } } */
/* { dg-final { scan-assembler-times "vfnmadd132ps" 4 } } */
/* { dg-final { scan-assembler-times "vfnmadd231ps" 4 } } */
/* { dg-final { scan-assembler-times "vfnmsub132ps" 4 } } */
/* { dg-final { scan-assembler-times "vfnmsub231ps" 4 } } */
-/* { dg-final { scan-assembler-times "vfmadd132ss" 32 } } */
-/* { dg-final { scan-assembler-times "vfmadd213ss" 32 } } */
-/* { dg-final { scan-assembler-times "vfmsub132ss" 32 } } */
-/* { dg-final { scan-assembler-times "vfmsub213ss" 32 } } */
-/* { dg-final { scan-assembler-times "vfnmadd132ss" 32 } } */
-/* { dg-final { scan-assembler-times "vfnmadd213ss" 32 } } */
-/* { dg-final { scan-assembler-times "vfnmsub132ss" 32 } } */
-/* { dg-final { scan-assembler-times "vfnmsub213ss" 32 } } */
+/* { dg-final { scan-assembler-times "vfmadd132ss" 60 } } */
+/* { dg-final { scan-assembler-times "vfmadd213ss" 60 } } */
+/* { dg-final { scan-assembler-times "vfmsub132ss" 60 } } */
+/* { dg-final { scan-assembler-times "vfmsub213ss" 60 } } */
+/* { dg-final { scan-assembler-times "vfnmadd132ss" 60 } } */
+/* { dg-final { scan-assembler-times "vfnmadd213ss" 60 } } */
+/* { dg-final { scan-assembler-times "vfnmsub132ss" 60 } } */
+/* { dg-final { scan-assembler-times "vfnmsub213ss" 60 } } */
diff --git a/gcc/testsuite/gcc.target/i386/l_fma_float_4.c b/gcc/testsuite/gcc.target/i386/l_fma_float_4.c
index 39548bf..bacb01e 100644
--- a/gcc/testsuite/gcc.target/i386/l_fma_float_4.c
+++ b/gcc/testsuite/gcc.target/i386/l_fma_float_4.c
@@ -12,7 +12,7 @@
/* { dg-final { scan-assembler-times "vfmsub132ps" 8 } } */
/* { dg-final { scan-assembler-times "vfnmadd132ps" 8 } } */
/* { dg-final { scan-assembler-times "vfnmsub132ps" 8 } } */
-/* { dg-final { scan-assembler-times "vfmadd132ss" 64 } } */
-/* { dg-final { scan-assembler-times "vfmsub132ss" 64 } } */
-/* { dg-final { scan-assembler-times "vfnmadd132ss" 64 } } */
-/* { dg-final { scan-assembler-times "vfnmsub132ss" 64 } } */
+/* { dg-final { scan-assembler-times "vfmadd132ss" 120 } } */
+/* { dg-final { scan-assembler-times "vfmsub132ss" 120 } } */
+/* { dg-final { scan-assembler-times "vfnmadd132ss" 120 } } */
+/* { dg-final { scan-assembler-times "vfnmsub132ss" 120 } } */
diff --git a/gcc/testsuite/gcc.target/i386/l_fma_float_5.c b/gcc/testsuite/gcc.target/i386/l_fma_float_5.c
index 83d7951..a32fc41 100644
--- a/gcc/testsuite/gcc.target/i386/l_fma_float_5.c
+++ b/gcc/testsuite/gcc.target/i386/l_fma_float_5.c
@@ -12,7 +12,7 @@
/* { dg-final { scan-assembler-times "vfmsub132ps" 8 } } */
/* { dg-final { scan-assembler-times "vfnmadd132ps" 8 } } */
/* { dg-final { scan-assembler-times "vfnmsub132ps" 8 } } */
-/* { dg-final { scan-assembler-times "vfmadd132ss" 64 } } */
-/* { dg-final { scan-assembler-times "vfmsub132ss" 64 } } */
-/* { dg-final { scan-assembler-times "vfnmadd132ss" 64 } } */
-/* { dg-final { scan-assembler-times "vfnmsub132ss" 64 } } */
+/* { dg-final { scan-assembler-times "vfmadd132ss" 120 } } */
+/* { dg-final { scan-assembler-times "vfmsub132ss" 120 } } */
+/* { dg-final { scan-assembler-times "vfnmadd132ss" 120 } } */
+/* { dg-final { scan-assembler-times "vfnmsub132ss" 120 } } */
diff --git a/gcc/testsuite/gcc.target/i386/l_fma_float_6.c b/gcc/testsuite/gcc.target/i386/l_fma_float_6.c
index 1eefc81..a7a74fb 100644
--- a/gcc/testsuite/gcc.target/i386/l_fma_float_6.c
+++ b/gcc/testsuite/gcc.target/i386/l_fma_float_6.c
@@ -12,7 +12,7 @@
/* { dg-final { scan-assembler-times "vfmsub132ps" 8 } } */
/* { dg-final { scan-assembler-times "vfnmadd132ps" 8 } } */
/* { dg-final { scan-assembler-times "vfnmsub132ps" 8 } } */
-/* { dg-final { scan-assembler-times "vfmadd132ss" 64 } } */
-/* { dg-final { scan-assembler-times "vfmsub132ss" 64 } } */
-/* { dg-final { scan-assembler-times "vfnmadd132ss" 64 } } */
-/* { dg-final { scan-assembler-times "vfnmsub132ss" 64 } } */
+/* { dg-final { scan-assembler-times "vfmadd132ss" 120 } } */
+/* { dg-final { scan-assembler-times "vfmsub132ss" 120 } } */
+/* { dg-final { scan-assembler-times "vfnmadd132ss" 120 } } */
+/* { dg-final { scan-assembler-times "vfnmsub132ss" 120 } } */
diff --git a/gcc/tree-vect-data-refs.c b/gcc/tree-vect-data-refs.c
index c1b5826..bf0b510 100644
--- a/gcc/tree-vect-data-refs.c
+++ b/gcc/tree-vect-data-refs.c
@@ -1024,7 +1024,8 @@ vector_alignment_reachable_p (struct data_reference *dr)
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
"Unknown misalignment, is_packed = %d",is_packed);
- if (targetm.vectorize.vector_alignment_reachable (type, is_packed))
+ if ((TYPE_USER_ALIGN (type) && !is_packed)
+ || targetm.vectorize.vector_alignment_reachable (type, is_packed))
return true;
else
return false;
@@ -1323,7 +1324,6 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
bool stat;
gimple stmt;
stmt_vec_info stmt_info;
- int vect_versioning_for_alias_required;
unsigned int npeel = 0;
bool all_misalignments_unknown = true;
unsigned int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
@@ -1510,15 +1510,8 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
}
}
- vect_versioning_for_alias_required
- = LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo);
-
- /* Temporarily, if versioning for alias is required, we disable peeling
- until we support peeling and versioning. Often peeling for alignment
- will require peeling for loop-bound, which in turn requires that we
- know how to adjust the loop ivs after the loop. */
- if (vect_versioning_for_alias_required
- || !vect_can_advance_ivs_p (loop_vinfo)
+ /* Check if we can possibly peel the loop. */
+ if (!vect_can_advance_ivs_p (loop_vinfo)
|| !slpeel_can_duplicate_loop_p (loop, single_exit (loop)))
do_peeling = false;
@@ -4722,9 +4715,10 @@ vect_supportable_dr_alignment (struct data_reference *dr,
if (!known_alignment_for_access_p (dr))
is_packed = not_size_aligned (DR_REF (dr));
- if (targetm.vectorize.
- support_vector_misalignment (mode, type,
- DR_MISALIGNMENT (dr), is_packed))
+ if ((TYPE_USER_ALIGN (type) && !is_packed)
+ || targetm.vectorize.
+ support_vector_misalignment (mode, type,
+ DR_MISALIGNMENT (dr), is_packed))
/* Can't software pipeline the loads, but can at least do them. */
return dr_unaligned_supported;
}
@@ -4736,9 +4730,10 @@ vect_supportable_dr_alignment (struct data_reference *dr,
if (!known_alignment_for_access_p (dr))
is_packed = not_size_aligned (DR_REF (dr));
- if (targetm.vectorize.
- support_vector_misalignment (mode, type,
- DR_MISALIGNMENT (dr), is_packed))
+ if ((TYPE_USER_ALIGN (type) && !is_packed)
+ || targetm.vectorize.
+ support_vector_misalignment (mode, type,
+ DR_MISALIGNMENT (dr), is_packed))
return dr_unaligned_supported;
}
diff --git a/gcc/tree-vect-loop-manip.c b/gcc/tree-vect-loop-manip.c
index bff5c22..82e724f 100644
--- a/gcc/tree-vect-loop-manip.c
+++ b/gcc/tree-vect-loop-manip.c
@@ -1555,7 +1555,6 @@ vect_can_advance_ivs_p (loop_vec_info loop_vinfo)
dump_printf_loc (MSG_NOTE, vect_location, "vect_can_advance_ivs_p:");
for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
{
- tree access_fn = NULL;
tree evolution_part;
phi = gsi_stmt (gsi);
@@ -1588,31 +1587,13 @@ vect_can_advance_ivs_p (loop_vec_info loop_vinfo)
/* Analyze the evolution function. */
- access_fn = instantiate_parameters
- (loop, analyze_scalar_evolution (loop, PHI_RESULT (phi)));
-
- if (!access_fn)
- {
- if (dump_enabled_p ())
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "No Access function.");
- return false;
- }
-
- STRIP_NOPS (access_fn);
- if (dump_enabled_p ())
- {
- dump_printf_loc (MSG_NOTE, vect_location,
- "Access function of PHI: ");
- dump_generic_expr (MSG_NOTE, TDF_SLIM, access_fn);
- }
-
- evolution_part = evolution_part_in_loop_num (access_fn, loop->num);
-
+ evolution_part
+ = STMT_VINFO_LOOP_PHI_EVOLUTION_PART (vinfo_for_stmt (phi));
if (evolution_part == NULL_TREE)
{
if (dump_enabled_p ())
- dump_printf (MSG_MISSED_OPTIMIZATION, "No evolution.");
+ dump_printf (MSG_MISSED_OPTIMIZATION,
+ "No access function or evolution.");
return false;
}
diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c
index 40eccea..0fb2dae 100644
--- a/gcc/tree-vect-loop.c
+++ b/gcc/tree-vect-loop.c
@@ -5499,19 +5499,22 @@ vect_transform_loop (loop_vec_info loop_vinfo)
check_profitability = true;
}
- /* Peel the loop if there are data refs with unknown alignment.
- Only one data ref with unknown store is allowed. */
+ /* Version the loop first, if required, so the profitability check
+ comes first. */
- if (LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo))
+ if (LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo)
+ || LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo))
{
- vect_do_peeling_for_alignment (loop_vinfo, th, check_profitability);
+ vect_loop_versioning (loop_vinfo, th, check_profitability);
check_profitability = false;
}
- if (LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo)
- || LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo))
+ /* Peel the loop if there are data refs with unknown alignment.
+ Only one data ref with unknown store is allowed. */
+
+ if (LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo))
{
- vect_loop_versioning (loop_vinfo, th, check_profitability);
+ vect_do_peeling_for_alignment (loop_vinfo, th, check_profitability);
check_profitability = false;
}