aboutsummaryrefslogtreecommitdiff
path: root/gcc/tree-vect-patterns.c
diff options
context:
space:
mode:
authorJakub Jelinek <jakub@redhat.com>2019-10-02 12:18:50 +0200
committerJakub Jelinek <jakub@gcc.gnu.org>2019-10-02 12:18:50 +0200
commit9ff9a0a5e6edd8729f559bf86ca06f781c4da246 (patch)
tree674edce34be00804e4173059cfd92b57f8f34aff /gcc/tree-vect-patterns.c
parent291fa23ac04e317877c1e102937532f080180bb2 (diff)
downloadgcc-9ff9a0a5e6edd8729f559bf86ca06f781c4da246.zip
gcc-9ff9a0a5e6edd8729f559bf86ca06f781c4da246.tar.gz
gcc-9ff9a0a5e6edd8729f559bf86ca06f781c4da246.tar.bz2
re PR tree-optimization/91940 (__builtin_bswap16 loop optimization)
PR tree-optimization/91940 * tree-vect-patterns.c: Include tree-vector-builder.h and vec-perm-indices.h. (vect_recog_rotate_pattern): Also handle __builtin_bswap16, either by unpromoting the argument back to uint16_t, or by converting into a rotate, or into shifts plus ior. * gcc.dg/vect/vect-bswap16.c: Add -msse4 on x86, run on all targets, expect vectorized 1 loops message on both vect_bswap and sse4_runtime targets. * gcc.dg/vect/vect-bswap16a.c: New test. From-SVN: r276442
Diffstat (limited to 'gcc/tree-vect-patterns.c')
-rw-r--r--gcc/tree-vect-patterns.c148
1 files changed, 133 insertions, 15 deletions
diff --git a/gcc/tree-vect-patterns.c b/gcc/tree-vect-patterns.c
index 4dfebbe..09db74b 100644
--- a/gcc/tree-vect-patterns.c
+++ b/gcc/tree-vect-patterns.c
@@ -46,6 +46,8 @@ along with GCC; see the file COPYING3. If not see
#include "cgraph.h"
#include "omp-simd-clone.h"
#include "predict.h"
+#include "tree-vector-builder.h"
+#include "vec-perm-indices.h"
/* Return true if we have a useful VR_RANGE range for VAR, storing it
in *MIN_VALUE and *MAX_VALUE if so. Note the range in the dump files. */
@@ -2168,24 +2170,107 @@ vect_recog_rotate_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
enum vect_def_type dt;
optab optab1, optab2;
edge ext_def = NULL;
+ bool bswap16_p = false;
- if (!is_gimple_assign (last_stmt))
- return NULL;
+ if (is_gimple_assign (last_stmt))
+ {
+ rhs_code = gimple_assign_rhs_code (last_stmt);
+ switch (rhs_code)
+ {
+ case LROTATE_EXPR:
+ case RROTATE_EXPR:
+ break;
+ default:
+ return NULL;
+ }
- rhs_code = gimple_assign_rhs_code (last_stmt);
- switch (rhs_code)
+ lhs = gimple_assign_lhs (last_stmt);
+ oprnd0 = gimple_assign_rhs1 (last_stmt);
+ type = TREE_TYPE (oprnd0);
+ oprnd1 = gimple_assign_rhs2 (last_stmt);
+ }
+ else if (gimple_call_builtin_p (last_stmt, BUILT_IN_BSWAP16))
{
- case LROTATE_EXPR:
- case RROTATE_EXPR:
- break;
- default:
- return NULL;
+ /* __builtin_bswap16 (x) is another form of x r>> 8.
+ The vectorizer has bswap support, but only if the argument isn't
+ promoted. */
+ lhs = gimple_call_lhs (last_stmt);
+ oprnd0 = gimple_call_arg (last_stmt, 0);
+ type = TREE_TYPE (oprnd0);
+ if (TYPE_PRECISION (TREE_TYPE (lhs)) != 16
+ || TYPE_PRECISION (type) <= 16
+ || TREE_CODE (oprnd0) != SSA_NAME
+ || BITS_PER_UNIT != 8
+ || !TYPE_UNSIGNED (TREE_TYPE (lhs)))
+ return NULL;
+
+ stmt_vec_info def_stmt_info;
+ if (!vect_is_simple_use (oprnd0, vinfo, &dt, &def_stmt_info, &def_stmt))
+ return NULL;
+
+ if (dt != vect_internal_def)
+ return NULL;
+
+ if (gimple_assign_cast_p (def_stmt))
+ {
+ def = gimple_assign_rhs1 (def_stmt);
+ if (INTEGRAL_TYPE_P (TREE_TYPE (def))
+ && TYPE_PRECISION (TREE_TYPE (def)) == 16)
+ oprnd0 = def;
+ }
+
+ type = TREE_TYPE (lhs);
+ vectype = get_vectype_for_scalar_type (type);
+ if (vectype == NULL_TREE)
+ return NULL;
+
+ if (tree char_vectype = get_same_sized_vectype (char_type_node, vectype))
+ {
+ /* The encoding uses one stepped pattern for each byte in the
+ 16-bit word. */
+ vec_perm_builder elts (TYPE_VECTOR_SUBPARTS (char_vectype), 2, 3);
+ for (unsigned i = 0; i < 3; ++i)
+ for (unsigned j = 0; j < 2; ++j)
+ elts.quick_push ((i + 1) * 2 - j - 1);
+
+ vec_perm_indices indices (elts, 1,
+ TYPE_VECTOR_SUBPARTS (char_vectype));
+ if (can_vec_perm_const_p (TYPE_MODE (char_vectype), indices))
+ {
+ /* vectorizable_bswap can handle the __builtin_bswap16 if we
+ undo the argument promotion. */
+ if (!useless_type_conversion_p (type, TREE_TYPE (oprnd0)))
+ {
+ def = vect_recog_temp_ssa_var (type, NULL);
+ def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd0);
+ append_pattern_def_seq (stmt_vinfo, def_stmt);
+ oprnd0 = def;
+ }
+
+ /* Pattern detected. */
+ vect_pattern_detected ("vect_recog_rotate_pattern", last_stmt);
+
+ *type_out = vectype;
+
+ /* Pattern supported. Create a stmt to be used to replace the
+ pattern, with the unpromoted argument. */
+ var = vect_recog_temp_ssa_var (type, NULL);
+ pattern_stmt = gimple_build_call (gimple_call_fndecl (last_stmt),
+ 1, oprnd0);
+ gimple_call_set_lhs (pattern_stmt, var);
+ gimple_call_set_fntype (as_a <gcall *> (pattern_stmt),
+ gimple_call_fntype (last_stmt));
+ return pattern_stmt;
+ }
+ }
+
+ oprnd1 = build_int_cst (integer_type_node, 8);
+ rhs_code = LROTATE_EXPR;
+ bswap16_p = true;
}
+ else
+ return NULL;
- lhs = gimple_assign_lhs (last_stmt);
- oprnd0 = gimple_assign_rhs1 (last_stmt);
- type = TREE_TYPE (oprnd0);
- oprnd1 = gimple_assign_rhs2 (last_stmt);
if (TREE_CODE (oprnd0) != SSA_NAME
|| TYPE_PRECISION (TREE_TYPE (lhs)) != TYPE_PRECISION (type)
|| !INTEGRAL_TYPE_P (type)
@@ -2210,14 +2295,39 @@ vect_recog_rotate_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
optab1 = optab_for_tree_code (rhs_code, vectype, optab_vector);
if (optab1
&& optab_handler (optab1, TYPE_MODE (vectype)) != CODE_FOR_nothing)
- return NULL;
+ {
+ use_rotate:
+ if (bswap16_p)
+ {
+ if (!useless_type_conversion_p (type, TREE_TYPE (oprnd0)))
+ {
+ def = vect_recog_temp_ssa_var (type, NULL);
+ def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd0);
+ append_pattern_def_seq (stmt_vinfo, def_stmt);
+ oprnd0 = def;
+ }
+
+ /* Pattern detected. */
+ vect_pattern_detected ("vect_recog_rotate_pattern", last_stmt);
+
+ *type_out = vectype;
+
+ /* Pattern supported. Create a stmt to be used to replace the
+ pattern. */
+ var = vect_recog_temp_ssa_var (type, NULL);
+ pattern_stmt = gimple_build_assign (var, LROTATE_EXPR, oprnd0,
+ oprnd1);
+ return pattern_stmt;
+ }
+ return NULL;
+ }
if (is_a <bb_vec_info> (vinfo) || dt != vect_internal_def)
{
optab2 = optab_for_tree_code (rhs_code, vectype, optab_scalar);
if (optab2
&& optab_handler (optab2, TYPE_MODE (vectype)) != CODE_FOR_nothing)
- return NULL;
+ goto use_rotate;
}
/* If vector/vector or vector/scalar shifts aren't supported by the target,
@@ -2242,6 +2352,14 @@ vect_recog_rotate_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
*type_out = vectype;
+ if (bswap16_p && !useless_type_conversion_p (type, TREE_TYPE (oprnd0)))
+ {
+ def = vect_recog_temp_ssa_var (type, NULL);
+ def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd0);
+ append_pattern_def_seq (stmt_vinfo, def_stmt);
+ oprnd0 = def;
+ }
+
if (dt == vect_external_def
&& TREE_CODE (oprnd1) == SSA_NAME)
ext_def = vect_get_external_def_edge (vinfo, oprnd1);