aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--gcc/ChangeLog7
-rw-r--r--gcc/testsuite/ChangeLog8
-rw-r--r--gcc/testsuite/gcc.dg/vect/vect-strided-store-a-u8-i2.c60
-rw-r--r--gcc/testsuite/gcc.dg/vect/vect-strided-store-u16-i4.c73
-rw-r--r--gcc/testsuite/gcc.dg/vect/vect-strided-store-u32-i2.c45
-rw-r--r--gcc/testsuite/gcc.dg/vect/vect-strided-u8-i8-gap4.c20
-rw-r--r--gcc/tree-vect-analyze.c12
-rw-r--r--gcc/tree-vect-transform.c34
8 files changed, 249 insertions, 10 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 61f49fa..23721c4 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,10 @@
+2006-12-12 Ira Rosen <irar@il.ibm.com>
+
+ * tree-vect-analyze.c (vect_analyze_data_ref_access): Add another check
+ for stores with gaps.
+ * tree-vect-transform.c (vect_permute_store_chain): Create
+ interleave_high or interleave_low according to the endianess.
+
2006-12-12 Richard Guenther <rguenther@suse.de>
PR middle-end/30147
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index 8e8c438..724bd05c 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,11 @@
+2006-12-12 Ira Rosen <irar@il.ibm.com>
+
+ * gcc.dg/vect/vect-strided-store-u32-i2.c: New test.
+ * gcc.dg/vect/vect-strided-store-a-u8-i2.c: New test.
+ * gcc.dg/vect/vect-strided-store-u16-i4.c: New test.
+ * gcc.dg/vect/vect-strided-u8-i8-gap4.c: Add a case of stores with
+ gaps.
+
2006-12-12 Richard Guenther <rguenther@suse.de>
PR middle-end/30147
diff --git a/gcc/testsuite/gcc.dg/vect/vect-strided-store-a-u8-i2.c b/gcc/testsuite/gcc.dg/vect/vect-strided-store-a-u8-i2.c
new file mode 100644
index 0000000..e7125b1
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-strided-store-a-u8-i2.c
@@ -0,0 +1,60 @@
+/* { dg-require-effective-target vect_int } */
+
+#include <stdarg.h>
+#include "tree-vect.h"
+
+#define N 64
+
+typedef struct {
+ unsigned char a;
+ unsigned char b;
+} s;
+
+int
+main1 ()
+{
+ s arr[N];
+ s *ptr = arr;
+ s res[N];
+ int i;
+ unsigned char a[N], b[N];
+
+
+ for (i = 0; i < N; i++)
+ {
+ a[i] = i;
+ b[i] = i * 2;
+ if (a[i] == 178)
+ abort();
+ }
+
+ for (i = 0; i < N; i++)
+ {
+ res[i].a = a[i] + 3;
+ res[i].b = a[i] + b[i];
+ ptr++;
+ }
+
+ /* check results: */
+ for (i = 0; i < N; i++)
+ {
+ if (res[i].a != a[i] + 3
+ || res[i].b != a[i] + b[i])
+ abort ();
+ }
+
+ return 0;
+}
+
+int main (void)
+{
+ check_vect ();
+
+ main1 ();
+
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { vect_interleave } } } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */
+
diff --git a/gcc/testsuite/gcc.dg/vect/vect-strided-store-u16-i4.c b/gcc/testsuite/gcc.dg/vect/vect-strided-store-u16-i4.c
new file mode 100644
index 0000000..629ab79
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-strided-store-u16-i4.c
@@ -0,0 +1,73 @@
+/* { dg-require-effective-target vect_int } */
+
+#include <stdarg.h>
+#include "tree-vect.h"
+
+#define N 128
+
+typedef struct {
+ unsigned short a;
+ unsigned short b;
+ unsigned short c;
+ unsigned short d;
+} s;
+
+unsigned short a[N];
+unsigned short b[N];
+unsigned short c[N];
+
+int
+main1 (s *arr)
+{
+ int i;
+ s *ptr = arr;
+ s res[N];
+ unsigned short x, y, z, w;
+
+ for (i = 0; i < N; i++)
+ {
+ res[i].c = a[i];
+ res[i].a = b[i];
+ res[i].d = c[i];
+ res[i].b = a[i] + b [i];
+ ptr++;
+ }
+
+ /* check results: */
+ for (i = 0; i < N; i++)
+ {
+ if (res[i].c != a[i]
+ || res[i].a != b[i]
+ || res[i].d != c[i]
+ || res[i].b != a[i] + b[i])
+ abort ();
+ }
+
+ return 0;
+}
+
+int main (void)
+{
+ int i;
+ s arr[N];
+
+ check_vect ();
+
+ for (i = 0; i < N; i++)
+ {
+ a[i] = i;
+ b[i] = i * 2;
+ c[i] = 17;
+ if (a[i] == 178)
+ abort();
+ }
+
+ main1 (arr);
+
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { vect_interleave } } } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */
+
+
diff --git a/gcc/testsuite/gcc.dg/vect/vect-strided-store-u32-i2.c b/gcc/testsuite/gcc.dg/vect/vect-strided-store-u32-i2.c
new file mode 100644
index 0000000..ec81098
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-strided-store-u32-i2.c
@@ -0,0 +1,45 @@
+/* { dg-require-effective-target vect_float } */
+
+#include <stdarg.h>
+#include <stdio.h>
+#include "tree-vect.h"
+
+#define N 16
+
+int
+main1 (void)
+{
+ int i;
+ int a[N*2];
+ int b[N] = {0,2,4,6,8,10,12,14,16,18,20,22,24,26,28,30};
+ int c[N] = {1,3,5,7,9,11,13,15,17,19,21,23,25,27,29,31};
+
+ /* Strided access pattern. */
+ for (i = 0; i < N/2; i++)
+ {
+ a[i*2] = b[i] + c[i];
+ a[i*2+1] = b[i] * c[i];
+ }
+
+ /* Check results. */
+ for (i = 0; i < N/2; i++)
+ {
+ if (a[i*2] != b[i] + c[i]
+ || a[i*2+1] != b[i] * c[i])
+ abort();
+ }
+
+ return 0;
+}
+
+int main (void)
+{
+ check_vect ();
+ return main1 ();
+}
+
+/* Needs interleaving support. */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { vect_interleave } } } } */
+/* { dg-final { scan-tree-dump-times "vectorized 0 loops" 1 "vect" { xfail { vect_interleave } } } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */
+
diff --git a/gcc/testsuite/gcc.dg/vect/vect-strided-u8-i8-gap4.c b/gcc/testsuite/gcc.dg/vect/vect-strided-u8-i8-gap4.c
index c176b32..7569146 100644
--- a/gcc/testsuite/gcc.dg/vect/vect-strided-u8-i8-gap4.c
+++ b/gcc/testsuite/gcc.dg/vect/vect-strided-u8-i8-gap4.c
@@ -49,8 +49,26 @@ main1 (s *arr)
|| res[i].e != arr[i].b + arr[i].e
|| res[i].h != arr[i].c
|| res[i].g != arr[i].b + arr[i].c)
- abort();
+ abort ();
}
+
+ ptr = arr;
+ /* Not vectorizable: gap in store. */
+ for (i = 0; i < N; i++)
+ {
+ res[i].a = ptr->b;
+ res[i].b = ptr->c;
+ ptr++;
+ }
+
+ /* Check results. */
+ for (i = 0; i < N; i++)
+ {
+ if (res[i].a != arr[i].b
+ || res[i].b != arr[i].c)
+ abort ();
+ }
+
}
diff --git a/gcc/tree-vect-analyze.c b/gcc/tree-vect-analyze.c
index 7506150..c8b2bf8 100644
--- a/gcc/tree-vect-analyze.c
+++ b/gcc/tree-vect-analyze.c
@@ -1804,7 +1804,8 @@ vect_analyze_data_ref_access (struct data_reference *dr)
/* COUNT is the number of accesses found, we multiply it by the size of
the type to get COUNT_IN_BYTES. */
count_in_bytes = type_size * count;
- /* Check the size of the interleaving is not greater than STEP. */
+
+ /* Check that the size of the interleaving is not greater than STEP. */
if (dr_step < count_in_bytes)
{
if (vect_print_dump_info (REPORT_DETAILS))
@@ -1815,6 +1816,15 @@ vect_analyze_data_ref_access (struct data_reference *dr)
return false;
}
+ /* Check that the size of the interleaving is equal to STEP for stores,
+ i.e., that there are no gaps. */
+ if (!DR_IS_READ (dr) && dr_step != count_in_bytes)
+ {
+ if (vect_print_dump_info (REPORT_DETAILS))
+ fprintf (vect_dump, "interleaved store with gaps");
+ return false;
+ }
+
/* Check that STEP is a multiple of type size. */
if ((dr_step % type_size) != 0)
{
diff --git a/gcc/tree-vect-transform.c b/gcc/tree-vect-transform.c
index 1be7689..f83d92a 100644
--- a/gcc/tree-vect-transform.c
+++ b/gcc/tree-vect-transform.c
@@ -2592,23 +2592,41 @@ vect_permute_store_chain (VEC(tree,heap) *dr_chain,
vect1 = VEC_index (tree, dr_chain, j);
vect2 = VEC_index (tree, dr_chain, j+length/2);
- /* high = interleave_high (vect1, vect2); */
+ /* Create interleaving stmt:
+ in the case of big endian:
+ high = interleave_high (vect1, vect2)
+ and in the case of little endian:
+ high = interleave_low (vect1, vect2). */
perm_dest = create_tmp_var (vectype, "vect_inter_high");
add_referenced_var (perm_dest);
- perm_stmt = build2 (GIMPLE_MODIFY_STMT, void_type_node, perm_dest,
- build2 (VEC_INTERLEAVE_HIGH_EXPR, vectype, vect1,
- vect2));
+ if (BYTES_BIG_ENDIAN)
+ perm_stmt = build2 (GIMPLE_MODIFY_STMT, void_type_node, perm_dest,
+ build2 (VEC_INTERLEAVE_HIGH_EXPR, vectype,
+ vect1, vect2));
+ else
+ perm_stmt = build2 (GIMPLE_MODIFY_STMT, void_type_node, perm_dest,
+ build2 (VEC_INTERLEAVE_LOW_EXPR, vectype,
+ vect1, vect2));
high = make_ssa_name (perm_dest, perm_stmt);
GIMPLE_STMT_OPERAND (perm_stmt, 0) = high;
vect_finish_stmt_generation (stmt, perm_stmt, bsi);
VEC_replace (tree, *result_chain, 2*j, high);
- /* low = interleave_low (vect1, vect2); */
+ /* Create interleaving stmt:
+ in the case of big endian:
+ low = interleave_low (vect1, vect2)
+ and in the case of little endian:
+ low = interleave_high (vect1, vect2). */
perm_dest = create_tmp_var (vectype, "vect_inter_low");
add_referenced_var (perm_dest);
- perm_stmt = build2 (GIMPLE_MODIFY_STMT, void_type_node, perm_dest,
- build2 (VEC_INTERLEAVE_LOW_EXPR, vectype, vect1,
- vect2));
+ if (BYTES_BIG_ENDIAN)
+ perm_stmt = build2 (GIMPLE_MODIFY_STMT, void_type_node, perm_dest,
+ build2 (VEC_INTERLEAVE_LOW_EXPR, vectype,
+ vect1, vect2));
+ else
+ perm_stmt = build2 (GIMPLE_MODIFY_STMT, void_type_node, perm_dest,
+ build2 (VEC_INTERLEAVE_HIGH_EXPR, vectype,
+ vect1, vect2));
low = make_ssa_name (perm_dest, perm_stmt);
GIMPLE_STMT_OPERAND (perm_stmt, 0) = low;
vect_finish_stmt_generation (stmt, perm_stmt, bsi);