aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
authorKugan Vivekanandarajah <kuganv@linaro.org>2019-06-13 03:18:54 +0000
committerKugan Vivekanandarajah <kugan@gcc.gnu.org>2019-06-13 03:18:54 +0000
commitfa9863e7d34ecd011ae75083be2ae124e5831b64 (patch)
tree77781de634a5011cf2a1275a44f8d8f3521f0e79 /gcc
parentdd550c996578ea7e94f3a59e57f24636186fbb95 (diff)
downloadgcc-fa9863e7d34ecd011ae75083be2ae124e5831b64.zip
gcc-fa9863e7d34ecd011ae75083be2ae124e5831b64.tar.gz
gcc-fa9863e7d34ecd011ae75083be2ae124e5831b64.tar.bz2
re PR target/88834 ([SVE] Poor addressing mode choices for LD2 and ST2)
gcc/ChangeLog: 2019-06-13 Kugan Vivekanandarajah <kugan.vivekanandarajah@linaro.org> PR target/88834 * tree-ssa-loop-ivopts.c (get_mem_type_for_internal_fn): Handle IFN_MASK_LOAD_LANES and IFN_MASK_STORE_LANES. (get_alias_ptr_type_for_ptr_address): Likewise. (add_iv_candidate_for_use): Add scaled index candidate if useful. * tree-ssa-address.c (preferred_mem_scale_factor): New. * config/aarch64/aarch64.c (aarch64_classify_address): Relax allow_reg_index_p. gcc/testsuite/ChangeLog: 2019-06-13 Kugan Vivekanandarajah <kugan.vivekanandarajah@linaro.org> PR target/88834 * gcc.target/aarch64/pr88834.c: New test. * gcc.target/aarch64/sve/struct_vect_1.c: Adjust. * gcc.target/aarch64/sve/struct_vect_14.c: Likewise. * gcc.target/aarch64/sve/struct_vect_15.c: Likewise. * gcc.target/aarch64/sve/struct_vect_16.c: Likewise. * gcc.target/aarch64/sve/struct_vect_17.c: Likewise. * gcc.target/aarch64/sve/struct_vect_7.c: Likewise. From-SVN: r272232
Diffstat (limited to 'gcc')
-rw-r--r--gcc/ChangeLog11
-rw-r--r--gcc/config/aarch64/aarch64.c2
-rw-r--r--gcc/testsuite/ChangeLog11
-rw-r--r--gcc/testsuite/gcc.target/aarch64/pr88834.c15
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve/struct_vect_1.c8
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve/struct_vect_14.c8
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve/struct_vect_15.c8
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve/struct_vect_16.c8
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve/struct_vect_17.c8
-rw-r--r--gcc/testsuite/gcc.target/aarch64/sve/struct_vect_7.c8
-rw-r--r--gcc/tree-ssa-address.c29
-rw-r--r--gcc/tree-ssa-address.h3
-rw-r--r--gcc/tree-ssa-loop-ivopts.c24
13 files changed, 118 insertions, 25 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index d18df48..58fb6fc 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,5 +1,16 @@
2019-06-13 Kugan Vivekanandarajah <kugan.vivekanandarajah@linaro.org>
+ PR target/88834
+ * tree-ssa-loop-ivopts.c (get_mem_type_for_internal_fn): Handle
+ IFN_MASK_LOAD_LANES and IFN_MASK_STORE_LANES.
+ (get_alias_ptr_type_for_ptr_address): Likewise.
+ (add_iv_candidate_for_use): Add scaled index candidate if useful.
+ * tree-ssa-address.c (preferred_mem_scale_factor): New.
+ * config/aarch64/aarch64.c (aarch64_classify_address): Relax
+ allow_reg_index_p.
+
+2019-06-13 Kugan Vivekanandarajah <kugan.vivekanandarajah@linaro.org>
+
* config/aarch64/iterators.md (ADDSUB): Fix typo in comment.
2019-06-12 Dimitar Dimitrov <dimitar@dinux.eu>
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index 9a035dd..f8285ac 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -6629,7 +6629,7 @@ aarch64_classify_address (struct aarch64_address_info *info,
bool allow_reg_index_p = (!load_store_pair_p
&& (known_lt (GET_MODE_SIZE (mode), 16)
|| vec_flags == VEC_ADVSIMD
- || vec_flags == VEC_SVE_DATA));
+ || vec_flags & VEC_SVE_DATA));
/* For SVE, only accept [Rn], [Rn, Rm, LSL #shift] and
[Rn, #offset, MUL VL]. */
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index 591d1f4..2f222a9 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,14 @@
+2019-06-13 Kugan Vivekanandarajah <kugan.vivekanandarajah@linaro.org>
+
+ PR target/88834
+ * gcc.target/aarch64/pr88834.c: New test.
+ * gcc.target/aarch64/sve/struct_vect_1.c: Adjust.
+ * gcc.target/aarch64/sve/struct_vect_14.c: Likewise.
+ * gcc.target/aarch64/sve/struct_vect_15.c: Likewise.
+ * gcc.target/aarch64/sve/struct_vect_16.c: Likewise.
+ * gcc.target/aarch64/sve/struct_vect_17.c: Likewise.
+ * gcc.target/aarch64/sve/struct_vect_7.c: Likewise.
+
2019-06-12 Marek Polacek <polacek@redhat.com>
PR c++/87410
diff --git a/gcc/testsuite/gcc.target/aarch64/pr88834.c b/gcc/testsuite/gcc.target/aarch64/pr88834.c
new file mode 100644
index 0000000..ea00967
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/pr88834.c
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-options "-S -O3 -march=armv8.2-a+sve" } */
+
+void
+f (int *restrict x, int *restrict y, int *restrict z, int n)
+{
+ for (int i = 0; i < n; i += 2)
+ {
+ x[i] = y[i] + z[i];
+ x[i + 1] = y[i + 1] - z[i + 1];
+ }
+}
+
+/* { dg-final { scan-assembler-times {\tld2w\t{z[0-9]+.s - z[0-9]+.s}, p[0-7]/z, \[x[0-9]+, x[0-9]+, lsl 2\]\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tst2w\t{z[0-9]+.s - z[0-9]+.s}, p[0-7], \[x[0-9]+, x[0-9]+, lsl 2\]\n} 1 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/struct_vect_1.c b/gcc/testsuite/gcc.target/aarch64/sve/struct_vect_1.c
index 6e3c889..918a581 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/struct_vect_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/struct_vect_1.c
@@ -83,9 +83,9 @@ NAME(g4) (TYPE *__restrict a, TYPE *__restrict b, TYPE *__restrict c,
}
}
-/* { dg-final { scan-assembler {\tld2b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7]/z, \[x[0-9]+\]\n} } } */
+/* { dg-final { scan-assembler {\tld2b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7]/z, \[x[0-9]+, x[0-9]+\]\n} } } */
/* { dg-final { scan-assembler {\tld3b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7]/z, \[x[0-9]+\]\n} } } */
-/* { dg-final { scan-assembler {\tld4b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7]/z, \[x[0-9]+\]\n} } } */
-/* { dg-final { scan-assembler {\tst2b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7], \[x[0-9]+\]\n} } } */
+/* { dg-final { scan-assembler {\tld4b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7]/z, \[x[0-9]+, x[0-9]+\]\n} } } */
+/* { dg-final { scan-assembler {\tst2b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7], \[x[0-9]+, x[0-9]+\]\n} } } */
/* { dg-final { scan-assembler {\tst3b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7], \[x[0-9]+\]\n} } } */
-/* { dg-final { scan-assembler {\tst4b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7], \[x[0-9]+\]\n} } } */
+/* { dg-final { scan-assembler {\tst4b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7], \[x[0-9]+, x[0-9]+\]\n} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/struct_vect_14.c b/gcc/testsuite/gcc.target/aarch64/sve/struct_vect_14.c
index 45644b6..a16a79e 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/struct_vect_14.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/struct_vect_14.c
@@ -43,12 +43,12 @@
#undef NAME
#undef TYPE
-/* { dg-final { scan-assembler-times {\tld2b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7]/z, \[x[0-9]+\]\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tld2b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7]/z, \[x[0-9]+, x[0-9]+\]\n} 1 } } */
/* { dg-final { scan-assembler-times {\tld3b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7]/z, \[x[0-9]+\]\n} 1 } } */
-/* { dg-final { scan-assembler-times {\tld4b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7]/z, \[x[0-9]+\]\n} 1 } } */
-/* { dg-final { scan-assembler-times {\tst2b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7], \[x[0-9]+\]\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tld4b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7]/z, \[x[0-9]+, x[0-9]+\]\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tst2b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7], \[x[0-9]+, x[0-9]+\]\n} 1 } } */
/* { dg-final { scan-assembler-times {\tst3b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7], \[x[0-9]+\]\n} 1 } } */
-/* { dg-final { scan-assembler-times {\tst4b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7], \[x[0-9]+\]\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tst4b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7], \[x[0-9]+, x[0-9]+\]\n} 1 } } */
/* { dg-final { scan-assembler-times {\tld2h\t{z[0-9]+.h - z[0-9]+.h}, p[0-7]/z, \[x[0-9]+\]\n} 2 } } */
/* { dg-final { scan-assembler-times {\tld3h\t{z[0-9]+.h - z[0-9]+.h}, p[0-7]/z, \[x[0-9]+\]\n} 2 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/struct_vect_15.c b/gcc/testsuite/gcc.target/aarch64/sve/struct_vect_15.c
index 814dbb3..bc00267 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/struct_vect_15.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/struct_vect_15.c
@@ -3,12 +3,12 @@
#include "struct_vect_14.c"
-/* { dg-final { scan-assembler-times {\tld2b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7]/z, \[x[0-9]+\]\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tld2b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7]/z, \[x[0-9]+, x[0-9]+\]\n} 1 } } */
/* { dg-final { scan-assembler-times {\tld3b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7]/z, \[x[0-9]+\]\n} 1 } } */
-/* { dg-final { scan-assembler-times {\tld4b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7]/z, \[x[0-9]+\]\n} 1 } } */
-/* { dg-final { scan-assembler-times {\tst2b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7], \[x[0-9]+\]\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tld4b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7]/z, \[x[0-9]+, x[0-9]+\]\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tst2b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7], \[x[0-9]+, x[0-9]+\]\n} 1 } } */
/* { dg-final { scan-assembler-times {\tst3b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7], \[x[0-9]+\]\n} 1 } } */
-/* { dg-final { scan-assembler-times {\tst4b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7], \[x[0-9]+\]\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tst4b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7], \[x[0-9]+, x[0-9]+\]\n} 1 } } */
/* { dg-final { scan-assembler-times {\tld2h\t{z[0-9]+.h - z[0-9]+.h}, p[0-7]/z, \[x[0-9]+\]\n} 2 } } */
/* { dg-final { scan-assembler-times {\tld3h\t{z[0-9]+.h - z[0-9]+.h}, p[0-7]/z, \[x[0-9]+\]\n} 2 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/struct_vect_16.c b/gcc/testsuite/gcc.target/aarch64/sve/struct_vect_16.c
index 6ecf89b..9e2a549 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/struct_vect_16.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/struct_vect_16.c
@@ -3,12 +3,12 @@
#include "struct_vect_14.c"
-/* { dg-final { scan-assembler-times {\tld2b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7]/z, \[x[0-9]+\]\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tld2b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7]/z, \[x[0-9]+, x[0-9]+\]\n} 1 } } */
/* { dg-final { scan-assembler-times {\tld3b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7]/z, \[x[0-9]+\]\n} 1 } } */
-/* { dg-final { scan-assembler-times {\tld4b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7]/z, \[x[0-9]+\]\n} 1 } } */
-/* { dg-final { scan-assembler-times {\tst2b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7], \[x[0-9]+\]\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tld4b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7]/z, \[x[0-9]+, x[0-9]+\]\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tst2b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7], \[x[0-9]+, x[0-9]+\]\n} 1 } } */
/* { dg-final { scan-assembler-times {\tst3b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7], \[x[0-9]+\]\n} 1 } } */
-/* { dg-final { scan-assembler-times {\tst4b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7], \[x[0-9]+\]\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tst4b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7], \[x[0-9]+, x[0-9]+\]\n} 1 } } */
/* { dg-final { scan-assembler-times {\tld2h\t{z[0-9]+.h - z[0-9]+.h}, p[0-7]/z, \[x[0-9]+\]\n} 2 } } */
/* { dg-final { scan-assembler-times {\tld3h\t{z[0-9]+.h - z[0-9]+.h}, p[0-7]/z, \[x[0-9]+\]\n} 2 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/struct_vect_17.c b/gcc/testsuite/gcc.target/aarch64/sve/struct_vect_17.c
index 571c6d0..e791e2e 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/struct_vect_17.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/struct_vect_17.c
@@ -3,12 +3,12 @@
#include "struct_vect_14.c"
-/* { dg-final { scan-assembler-times {\tld2b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7]/z, \[x[0-9]+\]\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tld2b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7]/z, \[x[0-9]+, x[0-9]+\]\n} 1 } } */
/* { dg-final { scan-assembler-times {\tld3b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7]/z, \[x[0-9]+\]\n} 1 } } */
-/* { dg-final { scan-assembler-times {\tld4b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7]/z, \[x[0-9]+\]\n} 1 } } */
-/* { dg-final { scan-assembler-times {\tst2b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7], \[x[0-9]+\]\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tld4b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7]/z, \[x[0-9]+, x[0-9]+\]\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tst2b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7], \[x[0-9]+, x[0-9]+\]\n} 1 } } */
/* { dg-final { scan-assembler-times {\tst3b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7], \[x[0-9]+\]\n} 1 } } */
-/* { dg-final { scan-assembler-times {\tst4b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7], \[x[0-9]+\]\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tst4b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7], \[x[0-9]+, x[0-9]+\]\n} 1 } } */
/* { dg-final { scan-assembler-times {\tld2h\t{z[0-9]+.h - z[0-9]+.h}, p[0-7]/z, \[x[0-9]+\]\n} 2 } } */
/* { dg-final { scan-assembler-times {\tld3h\t{z[0-9]+.h - z[0-9]+.h}, p[0-7]/z, \[x[0-9]+\]\n} 2 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/struct_vect_7.c b/gcc/testsuite/gcc.target/aarch64/sve/struct_vect_7.c
index b741901..3d3070e 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/struct_vect_7.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/struct_vect_7.c
@@ -78,9 +78,9 @@ g4 (TYPE *__restrict a, TYPE *__restrict b, TYPE *__restrict c,
}
}
-/* { dg-final { scan-assembler {\tld2b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7]/z, \[x[0-9]+\]\n} } } */
+/* { dg-final { scan-assembler {\tld2b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7]/z, \[x[0-9]+, x[0-9]+\]\n} } } */
/* { dg-final { scan-assembler {\tld3b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7]/z, \[x[0-9]+\]\n} } } */
-/* { dg-final { scan-assembler {\tld4b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7]/z, \[x[0-9]+\]\n} } } */
-/* { dg-final { scan-assembler {\tst2b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7], \[x[0-9]+\]\n} } } */
+/* { dg-final { scan-assembler {\tld4b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7]/z, \[x[0-9]+, x[0-9]+\]\n} } } */
+/* { dg-final { scan-assembler {\tst2b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7], \[x[0-9]+, x[0-9]+\]\n} } } */
/* { dg-final { scan-assembler {\tst3b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7], \[x[0-9]+\]\n} } } */
-/* { dg-final { scan-assembler {\tst4b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7], \[x[0-9]+\]\n} } } */
+/* { dg-final { scan-assembler {\tst4b\t{z[0-9]+.b - z[0-9]+.b}, p[0-7], \[x[0-9]+, x[0-9]+\]\n} } } */
diff --git a/gcc/tree-ssa-address.c b/gcc/tree-ssa-address.c
index 1c17e93..cdd432a 100644
--- a/gcc/tree-ssa-address.c
+++ b/gcc/tree-ssa-address.c
@@ -1127,6 +1127,35 @@ maybe_fold_tmr (tree ref)
return new_ref;
}
+/* Return the preferred index scale factor for accessing memory of mode
+ MEM_MODE in the address space of pointer BASE. Assume that we're
+ optimizing for speed if SPEED is true and for size otherwise. */
+unsigned int
+preferred_mem_scale_factor (tree base, machine_mode mem_mode,
+ bool speed)
+{
+ struct mem_address parts = {};
+ addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (base));
+ unsigned int fact = GET_MODE_UNIT_SIZE (mem_mode);
+
+ /* Addressing mode "base + index". */
+ parts.index = integer_one_node;
+ parts.base = integer_one_node;
+ rtx addr = addr_for_mem_ref (&parts, as, false);
+ unsigned cost = address_cost (addr, mem_mode, as, speed);
+
+ /* Addressing mode "base + index << scale". */
+ parts.step = wide_int_to_tree (sizetype, fact);
+ addr = addr_for_mem_ref (&parts, as, false);
+ unsigned new_cost = address_cost (addr, mem_mode, as, speed);
+
+ /* Compare the cost of an address with an unscaled index with
+ a scaled index and return factor if useful. */
+ if (new_cost < cost)
+ return GET_MODE_UNIT_SIZE (mem_mode);
+ return 1;
+}
+
/* Dump PARTS to FILE. */
extern void dump_mem_address (FILE *, struct mem_address *);
diff --git a/gcc/tree-ssa-address.h b/gcc/tree-ssa-address.h
index 6fa4eae..9812f36 100644
--- a/gcc/tree-ssa-address.h
+++ b/gcc/tree-ssa-address.h
@@ -39,4 +39,7 @@ tree create_mem_ref (gimple_stmt_iterator *, tree,
extern void copy_ref_info (tree, tree);
tree maybe_fold_tmr (tree);
+extern unsigned int preferred_mem_scale_factor (tree base,
+ machine_mode mem_mode,
+ bool speed);
#endif /* GCC_TREE_SSA_ADDRESS_H */
diff --git a/gcc/tree-ssa-loop-ivopts.c b/gcc/tree-ssa-loop-ivopts.c
index 890f9b7..047d4a0 100644
--- a/gcc/tree-ssa-loop-ivopts.c
+++ b/gcc/tree-ssa-loop-ivopts.c
@@ -2381,11 +2381,13 @@ get_mem_type_for_internal_fn (gcall *call, tree *op_p)
switch (gimple_call_internal_fn (call))
{
case IFN_MASK_LOAD:
+ case IFN_MASK_LOAD_LANES:
if (op_p == gimple_call_arg_ptr (call, 0))
return TREE_TYPE (gimple_call_lhs (call));
return NULL_TREE;
case IFN_MASK_STORE:
+ case IFN_MASK_STORE_LANES:
if (op_p == gimple_call_arg_ptr (call, 0))
return TREE_TYPE (gimple_call_arg (call, 3));
return NULL_TREE;
@@ -3430,6 +3432,26 @@ add_iv_candidate_for_use (struct ivopts_data *data, struct iv_use *use)
basetype = sizetype;
record_common_cand (data, build_int_cst (basetype, 0), iv->step, use);
+ /* Compare the cost of an address with an unscaled index with the cost of
+ an address with a scaled index and add candidate if useful. */
+ poly_int64 step;
+ if (use != NULL
+ && poly_int_tree_p (iv->step, &step)
+ && address_p (use->type))
+ {
+ poly_int64 new_step;
+ unsigned int fact = preferred_mem_scale_factor
+ (use->iv->base,
+ TYPE_MODE (use->mem_type),
+ optimize_loop_for_speed_p (data->current_loop));
+
+ if (fact != 1
+ && multiple_p (step, fact, &new_step))
+ add_candidate (data, size_int (0),
+ wide_int_to_tree (sizetype, new_step),
+ true, NULL);
+ }
+
/* Record common candidate with constant offset stripped in base.
Like the use itself, we also add candidate directly for it. */
base = strip_offset (iv->base, &offset);
@@ -7042,6 +7064,8 @@ get_alias_ptr_type_for_ptr_address (iv_use *use)
{
case IFN_MASK_LOAD:
case IFN_MASK_STORE:
+ case IFN_MASK_LOAD_LANES:
+ case IFN_MASK_STORE_LANES:
/* The second argument contains the correct alias type. */
gcc_assert (use->op_p = gimple_call_arg_ptr (call, 0));
return TREE_TYPE (gimple_call_arg (call, 1));