aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
Diffstat (limited to 'gcc')
-rw-r--r--gcc/config/i386/i386.cc43
-rw-r--r--gcc/testsuite/gcc.target/i386/pr119919.c13
2 files changed, 48 insertions, 8 deletions
diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
index 3b4dfd9..78df3d9 100644
--- a/gcc/config/i386/i386.cc
+++ b/gcc/config/i386/i386.cc
@@ -25375,14 +25375,32 @@ ix86_vector_costs::add_stmt_cost (int count, vect_cost_for_stmt kind,
case COND_EXPR:
{
/* SSE2 conditinal move sequence is:
- pcmpgtd %xmm5, %xmm0
+ pcmpgtd %xmm5, %xmm0 (accounted separately)
pand %xmm0, %xmm2
pandn %xmm1, %xmm0
por %xmm2, %xmm0
while SSE4 uses cmp + blend
- and AVX512 masked moves. */
-
- int ninsns = TARGET_SSE4_1 ? 2 : 4;
+ and AVX512 masked moves.
+
+ The condition is accounted separately since we usually have
+ p = a < b
+ c = p ? x : y
+ and we will account first statement as setcc. Exception is when
+ p is loaded from memory as bool and then we will not acocunt
+ the compare, but there is no way to check for this. */
+
+ int ninsns = TARGET_SSE4_1 ? 1 : 3;
+
+ /* If one of parameters is 0 or -1 the sequence will be simplified:
+ (if_true & mask) | (if_false & ~mask) -> if_true & mask */
+ if (ninsns > 1
+ && (zerop (gimple_assign_rhs2 (stmt_info->stmt))
+ || zerop (gimple_assign_rhs3 (stmt_info->stmt))
+ || integer_minus_onep
+ (gimple_assign_rhs2 (stmt_info->stmt))
+ || integer_minus_onep
+ (gimple_assign_rhs3 (stmt_info->stmt))))
+ ninsns = 1;
if (SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode))
stmt_cost = ninsns * ix86_cost->sse_op;
@@ -25393,8 +25411,8 @@ ix86_vector_costs::add_stmt_cost (int count, vect_cost_for_stmt kind,
else if (VECTOR_MODE_P (mode))
stmt_cost = ix86_vec_cost (mode, ninsns * ix86_cost->sse_op);
else
- /* compare + cmov. */
- stmt_cost = ix86_cost->add * 2;
+ /* compare (accounted separately) + cmov. */
+ stmt_cost = ix86_cost->add;
}
break;
@@ -25416,9 +25434,18 @@ ix86_vector_costs::add_stmt_cost (int count, vect_cost_for_stmt kind,
{
stmt_cost = ix86_vec_cost (mode, ix86_cost->sse_op);
/* vpmin was introduced in SSE3.
- SSE2 needs pcmpgtd + pand + pandn + pxor. */
+ SSE2 needs pcmpgtd + pand + pandn + pxor.
+ If one of parameters is 0 or -1 the sequence is simplified
+ to pcmpgtd + pand. */
if (!TARGET_SSSE3)
- stmt_cost *= 4;
+ {
+ if (zerop (gimple_assign_rhs2 (stmt_info->stmt))
+ || integer_minus_onep
+ (gimple_assign_rhs2 (stmt_info->stmt)))
+ stmt_cost *= 2;
+ else
+ stmt_cost *= 4;
+ }
}
else
/* cmp + cmov. */
diff --git a/gcc/testsuite/gcc.target/i386/pr119919.c b/gcc/testsuite/gcc.target/i386/pr119919.c
new file mode 100644
index 0000000..ed64656
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr119919.c
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -msse2 -fdump-tree-vect-details" } */
+int a[9*9];
+bool b[9];
+void test()
+{
+ for (int i = 0; i < 9; i++)
+ {
+ b[i] = a[i*9] != 0;
+ }
+}
+
+/* { dg-final { scan-tree-dump "loop vectorized using 8 byte vectors" "vect" } } */