diff options
Diffstat (limited to 'gcc/tree-vect-patterns.c')
-rw-r--r-- | gcc/tree-vect-patterns.c | 187 |
1 files changed, 126 insertions, 61 deletions
diff --git a/gcc/tree-vect-patterns.c b/gcc/tree-vect-patterns.c index ac56ace..ff1358a 100644 --- a/gcc/tree-vect-patterns.c +++ b/gcc/tree-vect-patterns.c @@ -1148,7 +1148,7 @@ vect_recog_sad_pattern (vec_info *vinfo, /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi inside the loop (in case we are analyzing an outer-loop). */ vect_unpromoted_value unprom[2]; - if (!vect_widened_op_tree (vinfo, diff_stmt_vinfo, MINUS_EXPR, MINUS_EXPR, + if (!vect_widened_op_tree (vinfo, diff_stmt_vinfo, MINUS_EXPR, WIDEN_MINUS_EXPR, false, 2, unprom, &half_type)) return NULL; @@ -1262,6 +1262,29 @@ vect_recog_widen_mult_pattern (vec_info *vinfo, stmt_vec_info last_stmt_info, "vect_recog_widen_mult_pattern"); } +/* Try to detect addition on widened inputs, converting PLUS_EXPR + to WIDEN_PLUS_EXPR. See vect_recog_widen_op_pattern for details. */ + +static gimple * +vect_recog_widen_plus_pattern (vec_info *vinfo, stmt_vec_info last_stmt_info, + tree *type_out) +{ + return vect_recog_widen_op_pattern (vinfo, last_stmt_info, type_out, + PLUS_EXPR, WIDEN_PLUS_EXPR, false, + "vect_recog_widen_plus_pattern"); +} + +/* Try to detect subtraction on widened inputs, converting MINUS_EXPR + to WIDEN_MINUS_EXPR. See vect_recog_widen_op_pattern for details. */ +static gimple * +vect_recog_widen_minus_pattern (vec_info *vinfo, stmt_vec_info last_stmt_info, + tree *type_out) +{ + return vect_recog_widen_op_pattern (vinfo, last_stmt_info, type_out, + MINUS_EXPR, WIDEN_MINUS_EXPR, false, + "vect_recog_widen_minus_pattern"); +} + /* Function vect_recog_pow_pattern Try to find the following pattern: @@ -1978,7 +2001,7 @@ vect_recog_average_pattern (vec_info *vinfo, vect_unpromoted_value unprom[3]; tree new_type; unsigned int nops = vect_widened_op_tree (vinfo, plus_stmt_info, PLUS_EXPR, - PLUS_EXPR, false, 3, + WIDEN_PLUS_EXPR, false, 3, unprom, &new_type); if (nops == 0) return NULL; @@ -4044,7 +4067,7 @@ vect_recog_bool_pattern (vec_info *vinfo, || rhs_code == VIEW_CONVERT_EXPR) { if (! INTEGRAL_TYPE_P (TREE_TYPE (lhs)) - || TYPE_PRECISION (TREE_TYPE (lhs)) == 1) + || VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (lhs))) return NULL; vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (lhs)); if (vectype == NULL_TREE) @@ -5007,6 +5030,8 @@ possible_vector_mask_operation_p (stmt_vec_info stmt_info) return TREE_CODE_CLASS (rhs_code) == tcc_comparison; } } + else if (is_a <gphi *> (stmt_info->stmt)) + return true; return false; } @@ -5017,47 +5042,40 @@ possible_vector_mask_operation_p (stmt_vec_info stmt_info) static void vect_determine_mask_precision (vec_info *vinfo, stmt_vec_info stmt_info) { - if (!possible_vector_mask_operation_p (stmt_info) - || stmt_info->mask_precision) + if (!possible_vector_mask_operation_p (stmt_info)) return; - auto_vec<stmt_vec_info, 32> worklist; - worklist.quick_push (stmt_info); - while (!worklist.is_empty ()) - { - stmt_info = worklist.last (); - unsigned int orig_length = worklist.length (); - - /* If at least one boolean input uses a vector mask type, - pick the mask type with the narrowest elements. + /* If at least one boolean input uses a vector mask type, + pick the mask type with the narrowest elements. - ??? This is the traditional behavior. It should always produce - the smallest number of operations, but isn't necessarily the - optimal choice. For example, if we have: + ??? This is the traditional behavior. It should always produce + the smallest number of operations, but isn't necessarily the + optimal choice. For example, if we have: - a = b & c + a = b & c - where: + where: - - the user of a wants it to have a mask type for 16-bit elements (M16) - - b also uses M16 - - c uses a mask type for 8-bit elements (M8) + - the user of a wants it to have a mask type for 16-bit elements (M16) + - b also uses M16 + - c uses a mask type for 8-bit elements (M8) - then picking M8 gives: + then picking M8 gives: - - 1 M16->M8 pack for b - - 1 M8 AND for a - - 2 M8->M16 unpacks for the user of a + - 1 M16->M8 pack for b + - 1 M8 AND for a + - 2 M8->M16 unpacks for the user of a - whereas picking M16 would have given: + whereas picking M16 would have given: - - 2 M8->M16 unpacks for c - - 2 M16 ANDs for a + - 2 M8->M16 unpacks for c + - 2 M16 ANDs for a - The number of operations are equal, but M16 would have given - a shorter dependency chain and allowed more ILP. */ - unsigned int precision = ~0U; - gassign *assign = as_a <gassign *> (stmt_info->stmt); + The number of operations are equal, but M16 would have given + a shorter dependency chain and allowed more ILP. */ + unsigned int precision = ~0U; + if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt)) + { unsigned int nops = gimple_num_ops (assign); for (unsigned int i = 1; i < nops; ++i) { @@ -5076,14 +5094,8 @@ vect_determine_mask_precision (vec_info *vinfo, stmt_vec_info stmt_info) if (precision > def_stmt_info->mask_precision) precision = def_stmt_info->mask_precision; } - else if (possible_vector_mask_operation_p (def_stmt_info)) - worklist.safe_push (def_stmt_info); } - /* Defer the choice if we need to visit operands first. */ - if (orig_length != worklist.length ()) - continue; - /* If the statement compares two values that shouldn't use vector masks, try comparing the values as normal scalars instead. */ tree_code rhs_code = gimple_assign_rhs_code (assign); @@ -5099,22 +5111,41 @@ vect_determine_mask_precision (vec_info *vinfo, stmt_vec_info stmt_info) && expand_vec_cmp_expr_p (vectype, mask_type, rhs_code)) precision = GET_MODE_BITSIZE (mode); } - - if (dump_enabled_p ()) + } + else + { + gphi *phi = as_a <gphi *> (stmt_info->stmt); + for (unsigned i = 0; i < gimple_phi_num_args (phi); ++i) { - if (precision == ~0U) - dump_printf_loc (MSG_NOTE, vect_location, - "using normal nonmask vectors for %G", - stmt_info->stmt); - else - dump_printf_loc (MSG_NOTE, vect_location, - "using boolean precision %d for %G", - precision, stmt_info->stmt); + tree rhs = gimple_phi_arg_def (phi, i); + + stmt_vec_info def_stmt_info = vinfo->lookup_def (rhs); + if (!def_stmt_info) + /* Don't let external or constant operands influence the choice. + We can convert them to whichever vector type we pick. */ + continue; + + if (def_stmt_info->mask_precision) + { + if (precision > def_stmt_info->mask_precision) + precision = def_stmt_info->mask_precision; + } } + } - stmt_info->mask_precision = precision; - worklist.pop (); + if (dump_enabled_p ()) + { + if (precision == ~0U) + dump_printf_loc (MSG_NOTE, vect_location, + "using normal nonmask vectors for %G", + stmt_info->stmt); + else + dump_printf_loc (MSG_NOTE, vect_location, + "using boolean precision %d for %G", + precision, stmt_info->stmt); } + + stmt_info->mask_precision = precision; } /* Handle vect_determine_precisions for STMT_INFO, given that we @@ -5129,7 +5160,6 @@ vect_determine_stmt_precisions (vec_info *vinfo, stmt_vec_info stmt_info) vect_determine_precisions_from_range (stmt_info, stmt); vect_determine_precisions_from_users (stmt_info, stmt); } - vect_determine_mask_precision (vinfo, stmt_info); } /* Walk backwards through the vectorizable region to determine the @@ -5153,6 +5183,14 @@ vect_determine_precisions (vec_info *vinfo) for (unsigned int i = 0; i < nbbs; i++) { + basic_block bb = bbs[i]; + for (auto si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si)) + if (!is_gimple_debug (gsi_stmt (si))) + vect_determine_mask_precision + (vinfo, vinfo->lookup_stmt (gsi_stmt (si))); + } + for (unsigned int i = 0; i < nbbs; i++) + { basic_block bb = bbs[nbbs - i - 1]; for (gimple_stmt_iterator si = gsi_last_bb (bb); !gsi_end_p (si); gsi_prev (&si)) @@ -5164,14 +5202,39 @@ vect_determine_precisions (vec_info *vinfo) else { bb_vec_info bb_vinfo = as_a <bb_vec_info> (vinfo); + for (unsigned i = 0; i < bb_vinfo->bbs.length (); ++i) + { + basic_block bb = bb_vinfo->bbs[i]; + for (auto gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi)) + { + stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi.phi ()); + if (stmt_info && STMT_VINFO_VECTORIZABLE (stmt_info)) + vect_determine_mask_precision (vinfo, stmt_info); + } + for (auto gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) + { + stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi_stmt (gsi)); + if (stmt_info && STMT_VINFO_VECTORIZABLE (stmt_info)) + vect_determine_mask_precision (vinfo, stmt_info); + } + } for (int i = bb_vinfo->bbs.length () - 1; i != -1; --i) - for (gimple_stmt_iterator gsi = gsi_last_bb (bb_vinfo->bbs[i]); - !gsi_end_p (gsi); gsi_prev (&gsi)) - { - stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi_stmt (gsi)); - if (stmt_info && STMT_VINFO_VECTORIZABLE (stmt_info)) - vect_determine_stmt_precisions (vinfo, stmt_info); - } + { + for (gimple_stmt_iterator gsi = gsi_last_bb (bb_vinfo->bbs[i]); + !gsi_end_p (gsi); gsi_prev (&gsi)) + { + stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi_stmt (gsi)); + if (stmt_info && STMT_VINFO_VECTORIZABLE (stmt_info)) + vect_determine_stmt_precisions (vinfo, stmt_info); + } + for (auto gsi = gsi_start_phis (bb_vinfo->bbs[i]); + !gsi_end_p (gsi); gsi_next (&gsi)) + { + stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi.phi ()); + if (stmt_info && STMT_VINFO_VECTORIZABLE (stmt_info)) + vect_determine_stmt_precisions (vinfo, stmt_info); + } + } } } @@ -5209,14 +5272,16 @@ static vect_recog_func vect_vect_recog_func_ptrs[] = { of mask conversion that are needed for gather and scatter internal functions. */ { vect_recog_gather_scatter_pattern, "gather_scatter" }, - { vect_recog_mask_conversion_pattern, "mask_conversion" } + { vect_recog_mask_conversion_pattern, "mask_conversion" }, + { vect_recog_widen_plus_pattern, "widen_plus" }, + { vect_recog_widen_minus_pattern, "widen_minus" }, }; const unsigned int NUM_PATTERNS = ARRAY_SIZE (vect_vect_recog_func_ptrs); /* Mark statements that are involved in a pattern. */ -static inline void +void vect_mark_pattern_stmts (vec_info *vinfo, stmt_vec_info orig_stmt_info, gimple *pattern_stmt, tree pattern_vectype) |