aboutsummaryrefslogtreecommitdiff
path: root/gcc/tree-vect-patterns.cc
diff options
context:
space:
mode:
authorTamar Christina <tamar.christina@arm.com>2024-07-22 10:26:14 +0100
committerTamar Christina <tamar.christina@arm.com>2024-07-22 10:26:21 +0100
commitaf792f0226e479b165a49de5e8f9e1d16a4b26c0 (patch)
tree94bdb93cae4b018f427674d90186f2d0a77e22dd /gcc/tree-vect-patterns.cc
parent913bab282d95e842907fec5a552a74ef64a6d4f6 (diff)
downloadgcc-af792f0226e479b165a49de5e8f9e1d16a4b26c0.zip
gcc-af792f0226e479b165a49de5e8f9e1d16a4b26c0.tar.gz
gcc-af792f0226e479b165a49de5e8f9e1d16a4b26c0.tar.bz2
middle-end: Implement conditonal store vectorizer pattern [PR115531]
This adds a conditional store optimization for the vectorizer as a pattern. The vectorizer already supports modifying memory accesses because of the pattern based gather/scatter recognition. Doing it in the vectorizer allows us to still keep the ability to vectorize such loops for architectures that don't have MASK_STORE support, whereas doing this in ifcvt makes us commit to MASK_STORE. Concretely for this loop: void foo1 (char *restrict a, int *restrict b, int *restrict c, int n, int stride) { if (stride <= 1) return; for (int i = 0; i < n; i++) { int res = c[i]; int t = b[i+stride]; if (a[i] != 0) res = t; c[i] = res; } } today we generate: .L3: ld1b z29.s, p7/z, [x0, x5] ld1w z31.s, p7/z, [x2, x5, lsl 2] ld1w z30.s, p7/z, [x1, x5, lsl 2] cmpne p15.b, p6/z, z29.b, #0 sel z30.s, p15, z30.s, z31.s st1w z30.s, p7, [x2, x5, lsl 2] add x5, x5, x4 whilelo p7.s, w5, w3 b.any .L3 which in gimple is: vect_res_18.9_68 = .MASK_LOAD (vectp_c.7_65, 32B, loop_mask_67); vect_t_20.12_74 = .MASK_LOAD (vectp.10_72, 32B, loop_mask_67); vect__9.15_77 = .MASK_LOAD (vectp_a.13_75, 8B, loop_mask_67); mask__34.16_79 = vect__9.15_77 != { 0, ... }; vect_res_11.17_80 = VEC_COND_EXPR <mask__34.16_79, vect_t_20.12_74, vect_res_18.9_68>; .MASK_STORE (vectp_c.18_81, 32B, loop_mask_67, vect_res_11.17_80); A MASK_STORE is already conditional, so there's no need to perform the load of the old values and the VEC_COND_EXPR. This patch makes it so we generate: vect_res_18.9_68 = .MASK_LOAD (vectp_c.7_65, 32B, loop_mask_67); vect__9.15_77 = .MASK_LOAD (vectp_a.13_75, 8B, loop_mask_67); mask__34.16_79 = vect__9.15_77 != { 0, ... }; .MASK_STORE (vectp_c.18_81, 32B, mask__34.16_79, vect_res_18.9_68); which generates: .L3: ld1b z30.s, p7/z, [x0, x5] ld1w z31.s, p7/z, [x1, x5, lsl 2] cmpne p7.b, p7/z, z30.b, #0 st1w z31.s, p7, [x2, x5, lsl 2] add x5, x5, x4 whilelo p7.s, w5, w3 b.any .L3 gcc/ChangeLog: PR tree-optimization/115531 * tree-vect-patterns.cc (vect_cond_store_pattern_same_ref): New. (vect_recog_cond_store_pattern): New. (vect_vect_recog_func_ptrs): Use it. * target.def (conditional_operation_is_expensive): New. * doc/tm.texi: Regenerate. * doc/tm.texi.in: Document it. * targhooks.cc (default_conditional_operation_is_expensive): New. * targhooks.h (default_conditional_operation_is_expensive): New.
Diffstat (limited to 'gcc/tree-vect-patterns.cc')
-rw-r--r--gcc/tree-vect-patterns.cc159
1 files changed, 159 insertions, 0 deletions
diff --git a/gcc/tree-vect-patterns.cc b/gcc/tree-vect-patterns.cc
index 4570c25..b0821c7 100644
--- a/gcc/tree-vect-patterns.cc
+++ b/gcc/tree-vect-patterns.cc
@@ -51,8 +51,10 @@ along with GCC; see the file COPYING3. If not see
#include "omp-simd-clone.h"
#include "predict.h"
#include "tree-vector-builder.h"
+#include "tree-ssa-loop-ivopts.h"
#include "vec-perm-indices.h"
#include "gimple-range.h"
+#include "alias.h"
/* TODO: Note the vectorizer still builds COND_EXPRs with GENERIC compares
@@ -6526,6 +6528,162 @@ vect_recog_gather_scatter_pattern (vec_info *vinfo,
return pattern_stmt;
}
+/* Helper method of vect_recog_cond_store_pattern, checks to see if COND_ARG
+ is points to a load statement that reads the same data as that of
+ STORE_VINFO. */
+
+static bool
+vect_cond_store_pattern_same_ref (vec_info *vinfo,
+ stmt_vec_info store_vinfo, tree cond_arg)
+{
+ stmt_vec_info load_stmt_vinfo = vinfo->lookup_def (cond_arg);
+ if (!load_stmt_vinfo
+ || !STMT_VINFO_DATA_REF (load_stmt_vinfo)
+ || DR_IS_WRITE (STMT_VINFO_DATA_REF (load_stmt_vinfo))
+ || !same_data_refs (STMT_VINFO_DATA_REF (store_vinfo),
+ STMT_VINFO_DATA_REF (load_stmt_vinfo)))
+ return false;
+
+ return true;
+}
+
+/* Function vect_recog_cond_store_pattern
+
+ Try to find the following pattern:
+
+ x = *_3;
+ c = a CMP b;
+ y = c ? t_20 : x;
+ *_3 = y;
+
+ where the store of _3 happens on a conditional select on a value loaded
+ from the same location. In such case we can elide the initial load if
+ MASK_STORE is supported and instead only conditionally write out the result.
+
+ The pattern produces for the above:
+
+ c = a CMP b;
+ .MASK_STORE (_3, c, t_20)
+
+ Input:
+
+ * STMT_VINFO: The stmt from which the pattern search begins. In the
+ example, when this function is called with _3 then the search begins.
+
+ Output:
+
+ * TYPE_OUT: The type of the output of this pattern.
+
+ * Return value: A new stmt that will be used to replace the sequence. */
+
+static gimple *
+vect_recog_cond_store_pattern (vec_info *vinfo,
+ stmt_vec_info stmt_vinfo, tree *type_out)
+{
+ loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
+ if (!loop_vinfo)
+ return NULL;
+
+ gimple *store_stmt = STMT_VINFO_STMT (stmt_vinfo);
+
+ /* Needs to be a gimple store where we have DR info for. */
+ if (!STMT_VINFO_DATA_REF (stmt_vinfo)
+ || DR_IS_READ (STMT_VINFO_DATA_REF (stmt_vinfo))
+ || !gimple_store_p (store_stmt))
+ return NULL;
+
+ tree st_rhs = gimple_assign_rhs1 (store_stmt);
+
+ if (TREE_CODE (st_rhs) != SSA_NAME)
+ return NULL;
+
+ gassign *cond_stmt = dyn_cast<gassign *> (SSA_NAME_DEF_STMT (st_rhs));
+ if (!cond_stmt || gimple_assign_rhs_code (cond_stmt) != COND_EXPR)
+ return NULL;
+
+ /* Check if the else value matches the original loaded one. */
+ bool invert = false;
+ tree cmp_ls = gimple_arg (cond_stmt, 0);
+ tree cond_arg1 = gimple_arg (cond_stmt, 1);
+ tree cond_arg2 = gimple_arg (cond_stmt, 2);
+
+ if (!vect_cond_store_pattern_same_ref (vinfo, stmt_vinfo, cond_arg2)
+ && !(invert = vect_cond_store_pattern_same_ref (vinfo, stmt_vinfo,
+ cond_arg1)))
+ return NULL;
+
+ vect_pattern_detected ("vect_recog_cond_store_pattern", store_stmt);
+
+ tree scalar_type = TREE_TYPE (st_rhs);
+ if (VECTOR_TYPE_P (scalar_type))
+ return NULL;
+
+ tree vectype = get_vectype_for_scalar_type (vinfo, scalar_type);
+ if (vectype == NULL_TREE)
+ return NULL;
+
+ machine_mode mask_mode;
+ machine_mode vecmode = TYPE_MODE (vectype);
+ if (targetm.vectorize.conditional_operation_is_expensive (IFN_MASK_STORE)
+ || !targetm.vectorize.get_mask_mode (vecmode).exists (&mask_mode)
+ || !can_vec_mask_load_store_p (vecmode, mask_mode, false))
+ return NULL;
+
+ tree base = DR_REF (STMT_VINFO_DATA_REF (stmt_vinfo));
+ if (may_be_nonaddressable_p (base))
+ return NULL;
+
+ /* We need to use the false parameter of the conditional select. */
+ tree cond_store_arg = invert ? cond_arg2 : cond_arg1;
+ tree cond_load_arg = invert ? cond_arg1 : cond_arg2;
+ gimple *load_stmt = SSA_NAME_DEF_STMT (cond_load_arg);
+
+ /* This is a rough estimation to check that there aren't any aliasing stores
+ in between the load and store. It's a bit strict, but for now it's good
+ enough. */
+ if (gimple_vuse (load_stmt) != gimple_vuse (store_stmt))
+ return NULL;
+
+ /* If we have to invert the condition, i.e. use the true argument rather than
+ the false argument, we have to negate the mask. */
+ if (invert)
+ {
+ tree var = vect_recog_temp_ssa_var (boolean_type_node, NULL);
+
+ /* Invert the mask using ^ 1. */
+ tree itype = TREE_TYPE (cmp_ls);
+ gassign *conv = gimple_build_assign (var, BIT_XOR_EXPR, cmp_ls,
+ build_int_cst (itype, 1));
+
+ tree mask_vec_type = get_mask_type_for_scalar_type (vinfo, itype);
+ append_pattern_def_seq (vinfo, stmt_vinfo, conv, mask_vec_type, itype);
+ cmp_ls= var;
+ }
+
+ if (TREE_CODE (base) != MEM_REF)
+ base = build_fold_addr_expr (base);
+
+ tree ptr = build_int_cst (reference_alias_ptr_type (base),
+ get_object_alignment (base));
+
+ /* Convert the mask to the right form. */
+ tree mask = vect_convert_mask_for_vectype (cmp_ls, vectype, stmt_vinfo,
+ vinfo);
+
+ gcall *call
+ = gimple_build_call_internal (IFN_MASK_STORE, 4, base, ptr, mask,
+ cond_store_arg);
+ gimple_set_location (call, gimple_location (store_stmt));
+
+ /* Copy across relevant vectorization info and associate DR with the
+ new pattern statement instead of the original statement. */
+ stmt_vec_info pattern_stmt_info = loop_vinfo->add_stmt (call);
+ loop_vinfo->move_dr (pattern_stmt_info, stmt_vinfo);
+
+ *type_out = vectype;
+ return call;
+}
+
/* Return true if TYPE is a non-boolean integer type. These are the types
that we want to consider for narrowing. */
@@ -7191,6 +7349,7 @@ static vect_recog_func vect_vect_recog_func_ptrs[] = {
of mask conversion that are needed for gather and scatter
internal functions. */
{ vect_recog_gather_scatter_pattern, "gather_scatter" },
+ { vect_recog_cond_store_pattern, "cond_store" },
{ vect_recog_mask_conversion_pattern, "mask_conversion" },
{ vect_recog_widen_plus_pattern, "widen_plus" },
{ vect_recog_widen_minus_pattern, "widen_minus" },