aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
authorBill Schmidt <wschmidt@linux.vnet.ibm.com>2016-08-12 15:23:34 +0000
committerWilliam Schmidt <wschmidt@gcc.gnu.org>2016-08-12 15:23:34 +0000
commit42b5ebf32cc8da61acd04925d8736c7827549dfa (patch)
treebf06d0240cfb2afe03fadf214b8cbd141e4c1fb6 /gcc
parent8eb414aa6c71225d941e24572d5cdd9ce6feee50 (diff)
downloadgcc-42b5ebf32cc8da61acd04925d8736c7827549dfa.zip
gcc-42b5ebf32cc8da61acd04925d8736c7827549dfa.tar.gz
gcc-42b5ebf32cc8da61acd04925d8736c7827549dfa.tar.bz2
rs6000.c (rs6000_builtin_vectorization_cost): Correct costs for vec_construct.
2016-08-12 Bill Schmidt <wschmidt@linux.vnet.ibm.com> * config/rs6000/rs6000.c (rs6000_builtin_vectorization_cost): Correct costs for vec_construct. From-SVN: r239417
Diffstat (limited to 'gcc')
-rw-r--r--gcc/ChangeLog5
-rw-r--r--gcc/config/rs6000/rs6000.c14
2 files changed, 14 insertions, 5 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index e0dab59..49f0b6d 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,8 @@
+2016-08-12 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
+
+ * config/rs6000/rs6000.c (rs6000_builtin_vectorization_cost):
+ Correct costs for vec_construct.
+
2016-08-12 Bin Cheng <bin.cheng@arm.com>
PR tree-optimization/69848
diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
index 756e69d..e58ede1 100644
--- a/gcc/config/rs6000/rs6000.c
+++ b/gcc/config/rs6000/rs6000.c
@@ -5266,16 +5266,20 @@ rs6000_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
return 2;
case vec_construct:
- elements = TYPE_VECTOR_SUBPARTS (vectype);
+ /* This is a rough approximation assuming non-constant elements
+ constructed into a vector via element insertion. FIXME:
+ vec_construct is not granular enough for uniformly good
+ decisions. If the initialization is a splat, this is
+ cheaper than we estimate. Improve this someday. */
elem_type = TREE_TYPE (vectype);
/* 32-bit vectors loaded into registers are stored as double
- precision, so we need n/2 converts in addition to the usual
- n/2 merges to construct a vector of short floats from them. */
+ precision, so we need 2 permutes, 2 converts, and 1 merge
+ to construct a vector of short floats from them. */
if (SCALAR_FLOAT_TYPE_P (elem_type)
&& TYPE_PRECISION (elem_type) == 32)
- return elements + 1;
+ return 5;
else
- return elements / 2 + 1;
+ return max (2, TYPE_VECTOR_SUBPARTS (vectype) - 1);
default:
gcc_unreachable ();