diff options
Diffstat (limited to 'gcc')
-rw-r--r-- | gcc/config/i386/i386.cc | 43 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/pr119919.c | 13 |
2 files changed, 48 insertions, 8 deletions
diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc index 3b4dfd9..78df3d9 100644 --- a/gcc/config/i386/i386.cc +++ b/gcc/config/i386/i386.cc @@ -25375,14 +25375,32 @@ ix86_vector_costs::add_stmt_cost (int count, vect_cost_for_stmt kind, case COND_EXPR: { /* SSE2 conditinal move sequence is: - pcmpgtd %xmm5, %xmm0 + pcmpgtd %xmm5, %xmm0 (accounted separately) pand %xmm0, %xmm2 pandn %xmm1, %xmm0 por %xmm2, %xmm0 while SSE4 uses cmp + blend - and AVX512 masked moves. */ - - int ninsns = TARGET_SSE4_1 ? 2 : 4; + and AVX512 masked moves. + + The condition is accounted separately since we usually have + p = a < b + c = p ? x : y + and we will account first statement as setcc. Exception is when + p is loaded from memory as bool and then we will not acocunt + the compare, but there is no way to check for this. */ + + int ninsns = TARGET_SSE4_1 ? 1 : 3; + + /* If one of parameters is 0 or -1 the sequence will be simplified: + (if_true & mask) | (if_false & ~mask) -> if_true & mask */ + if (ninsns > 1 + && (zerop (gimple_assign_rhs2 (stmt_info->stmt)) + || zerop (gimple_assign_rhs3 (stmt_info->stmt)) + || integer_minus_onep + (gimple_assign_rhs2 (stmt_info->stmt)) + || integer_minus_onep + (gimple_assign_rhs3 (stmt_info->stmt)))) + ninsns = 1; if (SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode)) stmt_cost = ninsns * ix86_cost->sse_op; @@ -25393,8 +25411,8 @@ ix86_vector_costs::add_stmt_cost (int count, vect_cost_for_stmt kind, else if (VECTOR_MODE_P (mode)) stmt_cost = ix86_vec_cost (mode, ninsns * ix86_cost->sse_op); else - /* compare + cmov. */ - stmt_cost = ix86_cost->add * 2; + /* compare (accounted separately) + cmov. */ + stmt_cost = ix86_cost->add; } break; @@ -25416,9 +25434,18 @@ ix86_vector_costs::add_stmt_cost (int count, vect_cost_for_stmt kind, { stmt_cost = ix86_vec_cost (mode, ix86_cost->sse_op); /* vpmin was introduced in SSE3. - SSE2 needs pcmpgtd + pand + pandn + pxor. */ + SSE2 needs pcmpgtd + pand + pandn + pxor. + If one of parameters is 0 or -1 the sequence is simplified + to pcmpgtd + pand. */ if (!TARGET_SSSE3) - stmt_cost *= 4; + { + if (zerop (gimple_assign_rhs2 (stmt_info->stmt)) + || integer_minus_onep + (gimple_assign_rhs2 (stmt_info->stmt))) + stmt_cost *= 2; + else + stmt_cost *= 4; + } } else /* cmp + cmov. */ diff --git a/gcc/testsuite/gcc.target/i386/pr119919.c b/gcc/testsuite/gcc.target/i386/pr119919.c new file mode 100644 index 0000000..ed64656 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr119919.c @@ -0,0 +1,13 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -msse2 -fdump-tree-vect-details" } */ +int a[9*9]; +bool b[9]; +void test() +{ + for (int i = 0; i < 9; i++) + { + b[i] = a[i*9] != 0; + } +} + +/* { dg-final { scan-tree-dump "loop vectorized using 8 byte vectors" "vect" } } */ |