aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
authorRichard Sandiford <richard.sandiford@linaro.org>2017-11-09 15:16:42 +0000
committerRichard Sandiford <rsandifo@gcc.gnu.org>2017-11-09 15:16:42 +0000
commit8b26c5492bf1d4c7acab18eff1e78a239af779a1 (patch)
tree2c159c59da94cd3e852865056a6b8f108912306a /gcc
parent83f15782dde23b068a806b3f7e0c253c49d148a2 (diff)
downloadgcc-8b26c5492bf1d4c7acab18eff1e78a239af779a1.zip
gcc-8b26c5492bf1d4c7acab18eff1e78a239af779a1.tar.gz
gcc-8b26c5492bf1d4c7acab18eff1e78a239af779a1.tar.bz2
Add vect_perm3_* target selectors
SLP load permutation fails if any individual permutation requires more than two vector inputs. For 128-bit vectors, it's possible to permute 3 contiguous loads of 32-bit and 8-bit elements, but not 16-bit elements or 64-bit elements. The results are reversed for 256-bit vectors, and so on for wider vectors. This patch adds a routine that tests whether a permute will require three vectors for a given vector count and element size, then adds vect_perm3_* target selectors for the cases that we currently use. 2017-11-09 Richard Sandiford <richard.sandiford@linaro.org> Alan Hayward <alan.hayward@arm.com> David Sherwood <david.sherwood@arm.com> gcc/ * doc/sourcebuild.texi (vect_perm_short, vect_perm_byte): Document previously undocumented selectors. (vect_perm3_byte, vect_perm3_short, vect_perm3_int): Document. gcc/testsuite/ * lib/target-supports.exp (vect_perm_supported): New proc. (check_effective_target_vect_perm3_int): Likewise. (check_effective_target_vect_perm3_short): Likewise. (check_effective_target_vect_perm3_byte): Likewise. * gcc.dg/vect/slp-perm-1.c: Expect SLP load permutation to succeed if vect_perm3_int. * gcc.dg/vect/slp-perm-5.c: Likewise. * gcc.dg/vect/slp-perm-6.c: Likewise. * gcc.dg/vect/slp-perm-7.c: Likewise. * gcc.dg/vect/slp-perm-8.c: Likewise vect_perm3_byte. * gcc.dg/vect/slp-perm-9.c: Likewise vect_perm3_short. Use vect_perm_short instead of vect_perm. Add a scan-tree-dump-not test for vect_perm3_short targets. Co-Authored-By: Alan Hayward <alan.hayward@arm.com> Co-Authored-By: David Sherwood <david.sherwood@arm.com> From-SVN: r254592
Diffstat (limited to 'gcc')
-rw-r--r--gcc/ChangeLog8
-rw-r--r--gcc/doc/sourcebuild.texi26
-rw-r--r--gcc/testsuite/ChangeLog18
-rw-r--r--gcc/testsuite/gcc.dg/vect/slp-perm-1.c4
-rw-r--r--gcc/testsuite/gcc.dg/vect/slp-perm-5.c4
-rw-r--r--gcc/testsuite/gcc.dg/vect/slp-perm-6.c4
-rw-r--r--gcc/testsuite/gcc.dg/vect/slp-perm-7.c4
-rw-r--r--gcc/testsuite/gcc.dg/vect/slp-perm-8.c4
-rw-r--r--gcc/testsuite/gcc.dg/vect/slp-perm-9.c11
-rw-r--r--gcc/testsuite/lib/target-supports.exp88
10 files changed, 156 insertions, 15 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 79d156e..9bb6826 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,4 +1,12 @@
2017-11-09 Richard Sandiford <richard.sandiford@linaro.org>
+ Alan Hayward <alan.hayward@arm.com>
+ David Sherwood <david.sherwood@arm.com>
+
+ * doc/sourcebuild.texi (vect_perm_short, vect_perm_byte): Document
+ previously undocumented selectors.
+ (vect_perm3_byte, vect_perm3_short, vect_perm3_int): Document.
+
+2017-11-09 Richard Sandiford <richard.sandiford@linaro.org>
* doc/rtl.texi (const_vector): Say that elements can be
const_wide_ints too.
diff --git a/gcc/doc/sourcebuild.texi b/gcc/doc/sourcebuild.texi
index c00aece..b36c812 100644
--- a/gcc/doc/sourcebuild.texi
+++ b/gcc/doc/sourcebuild.texi
@@ -1448,6 +1448,32 @@ element types.
@item vect_perm
Target supports vector permutation.
+@item vect_perm_byte
+Target supports permutation of vectors with 8-bit elements.
+
+@item vect_perm_short
+Target supports permutation of vectors with 16-bit elements.
+
+@item vect_perm3_byte
+Target supports permutation of vectors with 8-bit elements, and for the
+default vector length it is possible to permute:
+@example
+@{ a0, a1, a2, b0, b1, b2, @dots{} @}
+@end example
+to:
+@example
+@{ a0, a0, a0, b0, b0, b0, @dots{} @}
+@{ a1, a1, a1, b1, b1, b1, @dots{} @}
+@{ a2, a2, a2, b2, b2, b2, @dots{} @}
+@end example
+using only two-vector permutes, regardless of how long the sequence is.
+
+@item vect_perm3_int
+Like @code{vect_perm3_byte}, but for 32-bit elements.
+
+@item vect_perm3_short
+Like @code{vect_perm3_byte}, but for 16-bit elements.
+
@item vect_shift
Target supports a hardware vector shift operation.
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index 4696df9..fcefc79 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -2,6 +2,24 @@
Alan Hayward <alan.hayward@arm.com>
David Sherwood <david.sherwood@arm.com>
+ * lib/target-supports.exp (vect_perm_supported): New proc.
+ (check_effective_target_vect_perm3_int): Likewise.
+ (check_effective_target_vect_perm3_short): Likewise.
+ (check_effective_target_vect_perm3_byte): Likewise.
+ * gcc.dg/vect/slp-perm-1.c: Expect SLP load permutation to
+ succeed if vect_perm3_int.
+ * gcc.dg/vect/slp-perm-5.c: Likewise.
+ * gcc.dg/vect/slp-perm-6.c: Likewise.
+ * gcc.dg/vect/slp-perm-7.c: Likewise.
+ * gcc.dg/vect/slp-perm-8.c: Likewise vect_perm3_byte.
+ * gcc.dg/vect/slp-perm-9.c: Likewise vect_perm3_short.
+ Use vect_perm_short instead of vect_perm. Add a scan-tree-dump-not
+ test for vect_perm3_short targets.
+
+2017-11-09 Richard Sandiford <richard.sandiford@linaro.org>
+ Alan Hayward <alan.hayward@arm.com>
+ David Sherwood <david.sherwood@arm.com>
+
* gcc.dg/vect/no-vfa-vect-101.c: Use scan-tree-dump rather than
scan-tree-dump-times for vect_multiple_sizes.
* gcc.dg/vect/no-vfa-vect-102.c: Likewise.
diff --git a/gcc/testsuite/gcc.dg/vect/slp-perm-1.c b/gcc/testsuite/gcc.dg/vect/slp-perm-1.c
index ca832b0..6bd16ef 100644
--- a/gcc/testsuite/gcc.dg/vect/slp-perm-1.c
+++ b/gcc/testsuite/gcc.dg/vect/slp-perm-1.c
@@ -80,9 +80,9 @@ int main (int argc, const char* argv[])
}
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_perm } } } */
-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target { vect_perm && {! vect_load_lanes } } } } } */
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target { vect_perm3_int && {! vect_load_lanes } } } } } */
/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 0 "vect" { target vect_load_lanes } } } */
-/* { dg-final { scan-tree-dump "note: Built SLP cancelled: can use load/store-lanes" "vect" { target { vect_perm && vect_load_lanes } } } } */
+/* { dg-final { scan-tree-dump "note: Built SLP cancelled: can use load/store-lanes" "vect" { target { vect_perm3_int && vect_load_lanes } } } } */
/* { dg-final { scan-tree-dump "LOAD_LANES" "vect" { target vect_load_lanes } } } */
/* { dg-final { scan-tree-dump "STORE_LANES" "vect" { target vect_load_lanes } } } */
diff --git a/gcc/testsuite/gcc.dg/vect/slp-perm-5.c b/gcc/testsuite/gcc.dg/vect/slp-perm-5.c
index 0d2bf14..7e132e1 100644
--- a/gcc/testsuite/gcc.dg/vect/slp-perm-5.c
+++ b/gcc/testsuite/gcc.dg/vect/slp-perm-5.c
@@ -104,9 +104,9 @@ int main (int argc, const char* argv[])
}
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_perm } } } */
-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" { target { vect_perm && {! vect_load_lanes } } } } } */
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" { target { vect_perm3_int && {! vect_load_lanes } } } } } */
/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 0 "vect" { target vect_load_lanes } } } */
-/* { dg-final { scan-tree-dump "note: Built SLP cancelled: can use load/store-lanes" "vect" { target { vect_perm && vect_load_lanes } } } } */
+/* { dg-final { scan-tree-dump "note: Built SLP cancelled: can use load/store-lanes" "vect" { target { vect_perm3_int && vect_load_lanes } } } } */
/* { dg-final { scan-tree-dump "LOAD_LANES" "vect" { target vect_load_lanes } } } */
/* { dg-final { scan-tree-dump "STORE_LANES" "vect" { target vect_load_lanes } } } */
diff --git a/gcc/testsuite/gcc.dg/vect/slp-perm-6.c b/gcc/testsuite/gcc.dg/vect/slp-perm-6.c
index a697409..f97887f 100644
--- a/gcc/testsuite/gcc.dg/vect/slp-perm-6.c
+++ b/gcc/testsuite/gcc.dg/vect/slp-perm-6.c
@@ -103,8 +103,8 @@ int main (int argc, const char* argv[])
}
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_perm } } } */
-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" { target { vect_perm && {! vect_load_lanes } } } } } */
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" { target { vect_perm3_int && {! vect_load_lanes } } } } } */
/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target vect_load_lanes } } } */
-/* { dg-final { scan-tree-dump "note: Built SLP cancelled: can use load/store-lanes" "vect" { target { vect_perm && vect_load_lanes } } } } */
+/* { dg-final { scan-tree-dump "note: Built SLP cancelled: can use load/store-lanes" "vect" { target { vect_perm3_int && vect_load_lanes } } } } */
/* { dg-final { scan-tree-dump "LOAD_LANES" "vect" { target vect_load_lanes } } } */
/* { dg-final { scan-tree-dump "STORE_LANES" "vect" { target vect_load_lanes } } } */
diff --git a/gcc/testsuite/gcc.dg/vect/slp-perm-7.c b/gcc/testsuite/gcc.dg/vect/slp-perm-7.c
index 928ebfe..baf7f78 100644
--- a/gcc/testsuite/gcc.dg/vect/slp-perm-7.c
+++ b/gcc/testsuite/gcc.dg/vect/slp-perm-7.c
@@ -96,8 +96,8 @@ int main (int argc, const char* argv[])
}
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_perm } } } */
-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target { vect_perm && {! vect_load_lanes } } } } } */
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target { vect_perm3_int && { ! vect_load_lanes } } } } } */
/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 0 "vect" { target vect_load_lanes } } } */
-/* { dg-final { scan-tree-dump "note: Built SLP cancelled: can use load/store-lanes" "vect" { target { vect_perm && vect_load_lanes } } } } */
+/* { dg-final { scan-tree-dump "note: Built SLP cancelled: can use load/store-lanes" "vect" { target { vect_perm3_int && vect_load_lanes } } } } */
/* { dg-final { scan-tree-dump "LOAD_LANES" "vect" { target vect_load_lanes } } } */
/* { dg-final { scan-tree-dump "STORE_LANES" "vect" { target vect_load_lanes } } } */
diff --git a/gcc/testsuite/gcc.dg/vect/slp-perm-8.c b/gcc/testsuite/gcc.dg/vect/slp-perm-8.c
index 915bba2..94d4455 100644
--- a/gcc/testsuite/gcc.dg/vect/slp-perm-8.c
+++ b/gcc/testsuite/gcc.dg/vect/slp-perm-8.c
@@ -60,8 +60,8 @@ int main (int argc, const char* argv[])
}
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { vect_perm_byte } } } } */
-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target { vect_perm_byte && {! vect_load_lanes } } } } } */
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target { vect_perm3_byte && { ! vect_load_lanes } } } } } */
/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 0 "vect" { target vect_load_lanes } } } */
-/* { dg-final { scan-tree-dump "note: Built SLP cancelled: can use load/store-lanes" "vect" { target { vect_perm_byte && vect_load_lanes } } } } */
+/* { dg-final { scan-tree-dump "note: Built SLP cancelled: can use load/store-lanes" "vect" { target { vect_perm3_byte && vect_load_lanes } } } } */
/* { dg-final { scan-tree-dump "LOAD_LANES" "vect" { target vect_load_lanes } } } */
/* { dg-final { scan-tree-dump "STORE_LANES" "vect" { target vect_load_lanes } } } */
diff --git a/gcc/testsuite/gcc.dg/vect/slp-perm-9.c b/gcc/testsuite/gcc.dg/vect/slp-perm-9.c
index 4be200d..0b54b3b 100644
--- a/gcc/testsuite/gcc.dg/vect/slp-perm-9.c
+++ b/gcc/testsuite/gcc.dg/vect/slp-perm-9.c
@@ -57,9 +57,10 @@ int main (int argc, const char* argv[])
return 0;
}
-/* { dg-final { scan-tree-dump-times "vectorized 0 loops" 2 "vect" { target { {! vect_perm } || {! vect_sizes_16B_8B } } } } } */
-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { { vect_perm } && { vect_sizes_16B_8B } } } } } */
-/* { dg-final { scan-tree-dump-times "permutation requires at least three vectors" 1 "vect" { target vect_perm_short } } } */
-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 0 "vect" { target { {! vect_perm } || {! vect_sizes_32B_16B } } } } } */
-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target { { vect_perm } && { vect_sizes_32B_16B } } } } } */
+/* { dg-final { scan-tree-dump-times "vectorized 0 loops" 2 "vect" { target { ! { vect_perm_short || vect_load_lanes } } } } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { vect_perm_short || vect_load_lanes } } } } */
+/* { dg-final { scan-tree-dump-times "permutation requires at least three vectors" 1 "vect" { target { vect_perm_short && { ! vect_perm3_short } } } } } */
+/* { dg-final { scan-tree-dump-not "permutation requires at least three vectors" "vect" { target vect_perm3_short } } } */
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 0 "vect" { target { { ! vect_perm3_short } || vect_load_lanes } } } } */
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target { vect_perm3_short && { ! vect_load_lanes } } } } } */
diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp
index 8e7b641..8043d24 100644
--- a/gcc/testsuite/lib/target-supports.exp
+++ b/gcc/testsuite/lib/target-supports.exp
@@ -5547,6 +5547,78 @@ proc check_effective_target_vect_perm { } {
return $et_vect_perm_saved($et_index)
}
+# Return 1 if, for some VF:
+#
+# - the target's default vector size is VF * ELEMENT_BITS bits
+#
+# - it is possible to implement the equivalent of:
+#
+# int<ELEMENT_BITS>_t s1[COUNT][COUNT * VF], s2[COUNT * VF];
+# for (int i = 0; i < COUNT; ++i)
+# for (int j = 0; j < COUNT * VF; ++j)
+# s1[i][j] = s2[j - j % COUNT + i]
+#
+# using only a single 2-vector permute for each vector in s1.
+#
+# E.g. for COUNT == 3 and vector length 4, the two arrays would be:
+#
+# s2 | a0 a1 a2 a3 | b0 b1 b2 b3 | c0 c1 c2 c3
+# ------+-------------+-------------+------------
+# s1[0] | a0 a0 a0 a3 | a3 a3 b2 b2 | b2 c1 c1 c1
+# s1[1] | a1 a1 a1 b0 | b0 b0 b3 b3 | b3 c2 c2 c2
+# s1[2] | a2 a2 a2 b1 | b1 b1 c0 c0 | c0 c3 c3 c3
+#
+# Each s1 permute requires only two of a, b and c.
+#
+# The distance between the start of vector n in s1[0] and the start
+# of vector n in s2 is:
+#
+# A = (n * VF) % COUNT
+#
+# The corresponding value for the end of vector n is:
+#
+# B = (n * VF + VF - 1) % COUNT
+#
+# Subtracting i from each value gives the corresponding difference
+# for s1[i]. The condition being tested by this function is false
+# iff A - i > 0 and B - i < 0 for some i and n, such that the first
+# element for s1[i] comes from vector n - 1 of s2 and the last element
+# comes from vector n + 1 of s2. The condition is therefore true iff
+# A <= B for all n. This is turn means the condition is true iff:
+#
+# (n * VF) % COUNT + (VF - 1) % COUNT < COUNT
+#
+# for all n. COUNT - (n * VF) % COUNT is bounded by gcd (VF, COUNT),
+# and will be that value for at least one n in [0, COUNT), so we want:
+#
+# (VF - 1) % COUNT < gcd (VF, COUNT)
+
+proc vect_perm_supported { count element_bits } {
+ set vector_bits [lindex [available_vector_sizes] 0]
+ if { $vector_bits <= 0 } {
+ return 0
+ }
+ set vf [expr { $vector_bits / $element_bits }]
+
+ # Compute gcd (VF, COUNT).
+ set gcd $vf
+ set temp1 $count
+ while { $temp1 > 0 } {
+ set temp2 [expr { $gcd % $temp1 }]
+ set gcd $temp1
+ set temp1 $temp2
+ }
+ return [expr { ($vf - 1) % $count < $gcd }]
+}
+
+# Return 1 if the target supports SLP permutation of 3 vectors when each
+# element has 32 bits.
+
+proc check_effective_target_vect_perm3_int { } {
+ return [expr { [check_effective_target_vect_perm]
+ && [vect_perm_supported 3 32] }]
+}
+
# Return 1 if the target plus current options supports vector permutation
# on byte-sized elements, 0 otherwise.
#
@@ -5578,6 +5650,14 @@ proc check_effective_target_vect_perm_byte { } {
return $et_vect_perm_byte_saved($et_index)
}
+# Return 1 if the target supports SLP permutation of 3 vectors when each
+# element has 8 bits.
+
+proc check_effective_target_vect_perm3_byte { } {
+ return [expr { [check_effective_target_vect_perm_byte]
+ && [vect_perm_supported 3 8] }]
+}
+
# Return 1 if the target plus current options supports vector permutation
# on short-sized elements, 0 otherwise.
#
@@ -5609,6 +5689,14 @@ proc check_effective_target_vect_perm_short { } {
return $et_vect_perm_short_saved($et_index)
}
+# Return 1 if the target supports SLP permutation of 3 vectors when each
+# element has 16 bits.
+
+proc check_effective_target_vect_perm3_short { } {
+ return [expr { [check_effective_target_vect_perm_short]
+ && [vect_perm_supported 3 16] }]
+}
+
# Return 1 if the target plus current options supports folding of
# copysign into XORSIGN.
#