aboutsummaryrefslogtreecommitdiff
path: root/gcc/config/i386
diff options
context:
space:
mode:
authorJakub Jelinek <jakub@redhat.com>2018-05-17 11:54:36 +0200
committerJakub Jelinek <jakub@gcc.gnu.org>2018-05-17 11:54:36 +0200
commit28a8a768ebef5e31f950013f1b48b14c008b4b3b (patch)
treeb55a59fb3a2c1d50c6700d7191faf3008347f9ad /gcc/config/i386
parent4e6a811fad69dde184dd2900d4809a6cd7b42cf9 (diff)
downloadgcc-28a8a768ebef5e31f950013f1b48b14c008b4b3b.zip
gcc-28a8a768ebef5e31f950013f1b48b14c008b4b3b.tar.gz
gcc-28a8a768ebef5e31f950013f1b48b14c008b4b3b.tar.bz2
re PR target/85323 (SSE/AVX/AVX512 shift by 0 not optimized away)
PR target/85323 * config/i386/i386.c: Include tree-vector-builder.h. (ix86_vector_shift_count): New function. (ix86_fold_builtin): Fold shift builtins by scalar count. (ix86_gimple_fold_builtin): Likewise. * gcc.target/i386/pr85323-1.c: New test. * gcc.target/i386/pr85323-2.c: New test. * gcc.target/i386/pr85323-3.c: New test. From-SVN: r260311
Diffstat (limited to 'gcc/config/i386')
-rw-r--r--gcc/config/i386/i386.c340
1 files changed, 340 insertions, 0 deletions
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 0c7a6b7..f802add 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -91,6 +91,7 @@ along with GCC; see the file COPYING3. If not see
#include "ipa-prop.h"
#include "ipa-fnsummary.h"
#include "wide-int-bitmask.h"
+#include "tree-vector-builder.h"
/* This file should be included last. */
#include "target-def.h"
@@ -33365,6 +33366,28 @@ fold_builtin_cpu (tree fndecl, tree *args)
gcc_unreachable ();
}
+/* Return the shift count of a vector by scalar shift builtin second argument
+ ARG1. */
+static tree
+ix86_vector_shift_count (tree arg1)
+{
+ if (tree_fits_uhwi_p (arg1))
+ return arg1;
+ else if (TREE_CODE (arg1) == VECTOR_CST && CHAR_BIT == 8)
+ {
+ /* The count argument is weird, passed in as various 128-bit
+ (or 64-bit) vectors, the low 64 bits from it are the count. */
+ unsigned char buf[16];
+ int len = native_encode_expr (arg1, buf, 16);
+ if (len == 0)
+ return NULL_TREE;
+ tree t = native_interpret_expr (uint64_type_node, buf, len);
+ if (t && tree_fits_uhwi_p (t))
+ return t;
+ }
+ return NULL_TREE;
+}
+
static tree
ix86_fold_builtin (tree fndecl, int n_args,
tree *args, bool ignore ATTRIBUTE_UNUSED)
@@ -33373,6 +33396,8 @@ ix86_fold_builtin (tree fndecl, int n_args,
{
enum ix86_builtins fn_code = (enum ix86_builtins)
DECL_FUNCTION_CODE (fndecl);
+ enum rtx_code rcode;
+
switch (fn_code)
{
case IX86_BUILTIN_CPU_IS:
@@ -33553,6 +33578,168 @@ ix86_fold_builtin (tree fndecl, int n_args,
}
break;
+ case IX86_BUILTIN_PSLLD:
+ case IX86_BUILTIN_PSLLD128:
+ case IX86_BUILTIN_PSLLD128_MASK:
+ case IX86_BUILTIN_PSLLD256:
+ case IX86_BUILTIN_PSLLD256_MASK:
+ case IX86_BUILTIN_PSLLD512:
+ case IX86_BUILTIN_PSLLDI:
+ case IX86_BUILTIN_PSLLDI128:
+ case IX86_BUILTIN_PSLLDI128_MASK:
+ case IX86_BUILTIN_PSLLDI256:
+ case IX86_BUILTIN_PSLLDI256_MASK:
+ case IX86_BUILTIN_PSLLDI512:
+ case IX86_BUILTIN_PSLLQ:
+ case IX86_BUILTIN_PSLLQ128:
+ case IX86_BUILTIN_PSLLQ128_MASK:
+ case IX86_BUILTIN_PSLLQ256:
+ case IX86_BUILTIN_PSLLQ256_MASK:
+ case IX86_BUILTIN_PSLLQ512:
+ case IX86_BUILTIN_PSLLQI:
+ case IX86_BUILTIN_PSLLQI128:
+ case IX86_BUILTIN_PSLLQI128_MASK:
+ case IX86_BUILTIN_PSLLQI256:
+ case IX86_BUILTIN_PSLLQI256_MASK:
+ case IX86_BUILTIN_PSLLQI512:
+ case IX86_BUILTIN_PSLLW:
+ case IX86_BUILTIN_PSLLW128:
+ case IX86_BUILTIN_PSLLW128_MASK:
+ case IX86_BUILTIN_PSLLW256:
+ case IX86_BUILTIN_PSLLW256_MASK:
+ case IX86_BUILTIN_PSLLW512_MASK:
+ case IX86_BUILTIN_PSLLWI:
+ case IX86_BUILTIN_PSLLWI128:
+ case IX86_BUILTIN_PSLLWI128_MASK:
+ case IX86_BUILTIN_PSLLWI256:
+ case IX86_BUILTIN_PSLLWI256_MASK:
+ case IX86_BUILTIN_PSLLWI512_MASK:
+ rcode = ASHIFT;
+ goto do_shift;
+ case IX86_BUILTIN_PSRAD:
+ case IX86_BUILTIN_PSRAD128:
+ case IX86_BUILTIN_PSRAD128_MASK:
+ case IX86_BUILTIN_PSRAD256:
+ case IX86_BUILTIN_PSRAD256_MASK:
+ case IX86_BUILTIN_PSRAD512:
+ case IX86_BUILTIN_PSRADI:
+ case IX86_BUILTIN_PSRADI128:
+ case IX86_BUILTIN_PSRADI128_MASK:
+ case IX86_BUILTIN_PSRADI256:
+ case IX86_BUILTIN_PSRADI256_MASK:
+ case IX86_BUILTIN_PSRADI512:
+ case IX86_BUILTIN_PSRAQ128_MASK:
+ case IX86_BUILTIN_PSRAQ256_MASK:
+ case IX86_BUILTIN_PSRAQ512:
+ case IX86_BUILTIN_PSRAQI128_MASK:
+ case IX86_BUILTIN_PSRAQI256_MASK:
+ case IX86_BUILTIN_PSRAQI512:
+ case IX86_BUILTIN_PSRAW:
+ case IX86_BUILTIN_PSRAW128:
+ case IX86_BUILTIN_PSRAW128_MASK:
+ case IX86_BUILTIN_PSRAW256:
+ case IX86_BUILTIN_PSRAW256_MASK:
+ case IX86_BUILTIN_PSRAW512:
+ case IX86_BUILTIN_PSRAWI:
+ case IX86_BUILTIN_PSRAWI128:
+ case IX86_BUILTIN_PSRAWI128_MASK:
+ case IX86_BUILTIN_PSRAWI256:
+ case IX86_BUILTIN_PSRAWI256_MASK:
+ case IX86_BUILTIN_PSRAWI512:
+ rcode = ASHIFTRT;
+ goto do_shift;
+ case IX86_BUILTIN_PSRLD:
+ case IX86_BUILTIN_PSRLD128:
+ case IX86_BUILTIN_PSRLD128_MASK:
+ case IX86_BUILTIN_PSRLD256:
+ case IX86_BUILTIN_PSRLD256_MASK:
+ case IX86_BUILTIN_PSRLD512:
+ case IX86_BUILTIN_PSRLDI:
+ case IX86_BUILTIN_PSRLDI128:
+ case IX86_BUILTIN_PSRLDI128_MASK:
+ case IX86_BUILTIN_PSRLDI256:
+ case IX86_BUILTIN_PSRLDI256_MASK:
+ case IX86_BUILTIN_PSRLDI512:
+ case IX86_BUILTIN_PSRLQ:
+ case IX86_BUILTIN_PSRLQ128:
+ case IX86_BUILTIN_PSRLQ128_MASK:
+ case IX86_BUILTIN_PSRLQ256:
+ case IX86_BUILTIN_PSRLQ256_MASK:
+ case IX86_BUILTIN_PSRLQ512:
+ case IX86_BUILTIN_PSRLQI:
+ case IX86_BUILTIN_PSRLQI128:
+ case IX86_BUILTIN_PSRLQI128_MASK:
+ case IX86_BUILTIN_PSRLQI256:
+ case IX86_BUILTIN_PSRLQI256_MASK:
+ case IX86_BUILTIN_PSRLQI512:
+ case IX86_BUILTIN_PSRLW:
+ case IX86_BUILTIN_PSRLW128:
+ case IX86_BUILTIN_PSRLW128_MASK:
+ case IX86_BUILTIN_PSRLW256:
+ case IX86_BUILTIN_PSRLW256_MASK:
+ case IX86_BUILTIN_PSRLW512:
+ case IX86_BUILTIN_PSRLWI:
+ case IX86_BUILTIN_PSRLWI128:
+ case IX86_BUILTIN_PSRLWI128_MASK:
+ case IX86_BUILTIN_PSRLWI256:
+ case IX86_BUILTIN_PSRLWI256_MASK:
+ case IX86_BUILTIN_PSRLWI512:
+ rcode = LSHIFTRT;
+ goto do_shift;
+
+ do_shift:
+ gcc_assert (n_args >= 2);
+ if (TREE_CODE (args[0]) != VECTOR_CST)
+ break;
+ if (n_args > 2)
+ {
+ /* This is masked shift. Only optimize if the mask is all
+ ones. */
+ if (!tree_fits_uhwi_p (args[n_args - 1])
+ || TREE_SIDE_EFFECTS (args[n_args - 2]))
+ break;
+ unsigned HOST_WIDE_INT mask = tree_to_uhwi (args[n_args - 1]);
+ unsigned elems = TYPE_VECTOR_SUBPARTS (TREE_TYPE (args[0]));
+ if ((mask | (HOST_WIDE_INT_M1U << elems)) != HOST_WIDE_INT_M1U)
+ break;
+ }
+ if (tree tem = ix86_vector_shift_count (args[1]))
+ {
+ unsigned HOST_WIDE_INT count = tree_to_uhwi (tem);
+ if (count == 0)
+ return args[0];
+ if (count >= TYPE_PRECISION (TREE_TYPE (TREE_TYPE (args[0]))))
+ {
+ if (rcode != ASHIFTRT)
+ return build_zero_cst (TREE_TYPE (args[0]));
+ count = TYPE_PRECISION (TREE_TYPE (TREE_TYPE (args[0]))) - 1;
+ }
+ tree countt = build_int_cst (integer_type_node, count);
+ tree_vector_builder builder;
+ builder.new_unary_operation (TREE_TYPE (args[0]), args[0],
+ false);
+ unsigned int cnt = builder.encoded_nelts ();
+ for (unsigned int i = 0; i < cnt; ++i)
+ {
+ tree elt = VECTOR_CST_ELT (args[0], i);
+ if (TREE_CODE (elt) != INTEGER_CST || TREE_OVERFLOW (elt))
+ return NULL_TREE;
+ tree type = TREE_TYPE (elt);
+ if (rcode == LSHIFTRT)
+ elt = fold_convert (unsigned_type_for (type), elt);
+ elt = const_binop (rcode == ASHIFT
+ ? LSHIFT_EXPR : RSHIFT_EXPR, TREE_TYPE (elt),
+ elt, countt);
+ if (!elt || TREE_CODE (elt) != INTEGER_CST)
+ return NULL_TREE;
+ if (rcode == LSHIFTRT)
+ elt = fold_convert (type, elt);
+ builder.quick_push (elt);
+ }
+ return builder.build ();
+ }
+ break;
+
default:
break;
}
@@ -33578,6 +33765,8 @@ ix86_gimple_fold_builtin (gimple_stmt_iterator *gsi)
enum ix86_builtins fn_code = (enum ix86_builtins) DECL_FUNCTION_CODE (fndecl);
tree decl = NULL_TREE;
tree arg0, arg1;
+ enum rtx_code rcode;
+ unsigned HOST_WIDE_INT count;
switch (fn_code)
{
@@ -33657,6 +33846,157 @@ ix86_gimple_fold_builtin (gimple_stmt_iterator *gsi)
}
break;
+ case IX86_BUILTIN_PSLLD:
+ case IX86_BUILTIN_PSLLD128:
+ case IX86_BUILTIN_PSLLD128_MASK:
+ case IX86_BUILTIN_PSLLD256:
+ case IX86_BUILTIN_PSLLD256_MASK:
+ case IX86_BUILTIN_PSLLD512:
+ case IX86_BUILTIN_PSLLDI:
+ case IX86_BUILTIN_PSLLDI128:
+ case IX86_BUILTIN_PSLLDI128_MASK:
+ case IX86_BUILTIN_PSLLDI256:
+ case IX86_BUILTIN_PSLLDI256_MASK:
+ case IX86_BUILTIN_PSLLDI512:
+ case IX86_BUILTIN_PSLLQ:
+ case IX86_BUILTIN_PSLLQ128:
+ case IX86_BUILTIN_PSLLQ128_MASK:
+ case IX86_BUILTIN_PSLLQ256:
+ case IX86_BUILTIN_PSLLQ256_MASK:
+ case IX86_BUILTIN_PSLLQ512:
+ case IX86_BUILTIN_PSLLQI:
+ case IX86_BUILTIN_PSLLQI128:
+ case IX86_BUILTIN_PSLLQI128_MASK:
+ case IX86_BUILTIN_PSLLQI256:
+ case IX86_BUILTIN_PSLLQI256_MASK:
+ case IX86_BUILTIN_PSLLQI512:
+ case IX86_BUILTIN_PSLLW:
+ case IX86_BUILTIN_PSLLW128:
+ case IX86_BUILTIN_PSLLW128_MASK:
+ case IX86_BUILTIN_PSLLW256:
+ case IX86_BUILTIN_PSLLW256_MASK:
+ case IX86_BUILTIN_PSLLW512_MASK:
+ case IX86_BUILTIN_PSLLWI:
+ case IX86_BUILTIN_PSLLWI128:
+ case IX86_BUILTIN_PSLLWI128_MASK:
+ case IX86_BUILTIN_PSLLWI256:
+ case IX86_BUILTIN_PSLLWI256_MASK:
+ case IX86_BUILTIN_PSLLWI512_MASK:
+ rcode = ASHIFT;
+ goto do_shift;
+ case IX86_BUILTIN_PSRAD:
+ case IX86_BUILTIN_PSRAD128:
+ case IX86_BUILTIN_PSRAD128_MASK:
+ case IX86_BUILTIN_PSRAD256:
+ case IX86_BUILTIN_PSRAD256_MASK:
+ case IX86_BUILTIN_PSRAD512:
+ case IX86_BUILTIN_PSRADI:
+ case IX86_BUILTIN_PSRADI128:
+ case IX86_BUILTIN_PSRADI128_MASK:
+ case IX86_BUILTIN_PSRADI256:
+ case IX86_BUILTIN_PSRADI256_MASK:
+ case IX86_BUILTIN_PSRADI512:
+ case IX86_BUILTIN_PSRAQ128_MASK:
+ case IX86_BUILTIN_PSRAQ256_MASK:
+ case IX86_BUILTIN_PSRAQ512:
+ case IX86_BUILTIN_PSRAQI128_MASK:
+ case IX86_BUILTIN_PSRAQI256_MASK:
+ case IX86_BUILTIN_PSRAQI512:
+ case IX86_BUILTIN_PSRAW:
+ case IX86_BUILTIN_PSRAW128:
+ case IX86_BUILTIN_PSRAW128_MASK:
+ case IX86_BUILTIN_PSRAW256:
+ case IX86_BUILTIN_PSRAW256_MASK:
+ case IX86_BUILTIN_PSRAW512:
+ case IX86_BUILTIN_PSRAWI:
+ case IX86_BUILTIN_PSRAWI128:
+ case IX86_BUILTIN_PSRAWI128_MASK:
+ case IX86_BUILTIN_PSRAWI256:
+ case IX86_BUILTIN_PSRAWI256_MASK:
+ case IX86_BUILTIN_PSRAWI512:
+ rcode = ASHIFTRT;
+ goto do_shift;
+ case IX86_BUILTIN_PSRLD:
+ case IX86_BUILTIN_PSRLD128:
+ case IX86_BUILTIN_PSRLD128_MASK:
+ case IX86_BUILTIN_PSRLD256:
+ case IX86_BUILTIN_PSRLD256_MASK:
+ case IX86_BUILTIN_PSRLD512:
+ case IX86_BUILTIN_PSRLDI:
+ case IX86_BUILTIN_PSRLDI128:
+ case IX86_BUILTIN_PSRLDI128_MASK:
+ case IX86_BUILTIN_PSRLDI256:
+ case IX86_BUILTIN_PSRLDI256_MASK:
+ case IX86_BUILTIN_PSRLDI512:
+ case IX86_BUILTIN_PSRLQ:
+ case IX86_BUILTIN_PSRLQ128:
+ case IX86_BUILTIN_PSRLQ128_MASK:
+ case IX86_BUILTIN_PSRLQ256:
+ case IX86_BUILTIN_PSRLQ256_MASK:
+ case IX86_BUILTIN_PSRLQ512:
+ case IX86_BUILTIN_PSRLQI:
+ case IX86_BUILTIN_PSRLQI128:
+ case IX86_BUILTIN_PSRLQI128_MASK:
+ case IX86_BUILTIN_PSRLQI256:
+ case IX86_BUILTIN_PSRLQI256_MASK:
+ case IX86_BUILTIN_PSRLQI512:
+ case IX86_BUILTIN_PSRLW:
+ case IX86_BUILTIN_PSRLW128:
+ case IX86_BUILTIN_PSRLW128_MASK:
+ case IX86_BUILTIN_PSRLW256:
+ case IX86_BUILTIN_PSRLW256_MASK:
+ case IX86_BUILTIN_PSRLW512:
+ case IX86_BUILTIN_PSRLWI:
+ case IX86_BUILTIN_PSRLWI128:
+ case IX86_BUILTIN_PSRLWI128_MASK:
+ case IX86_BUILTIN_PSRLWI256:
+ case IX86_BUILTIN_PSRLWI256_MASK:
+ case IX86_BUILTIN_PSRLWI512:
+ rcode = LSHIFTRT;
+ goto do_shift;
+
+ do_shift:
+ gcc_assert (n_args >= 2);
+ arg0 = gimple_call_arg (stmt, 0);
+ arg1 = gimple_call_arg (stmt, 1);
+ if (n_args > 2)
+ {
+ /* This is masked shift. Only optimize if the mask is all ones. */
+ tree argl = gimple_call_arg (stmt, n_args - 1);
+ if (!tree_fits_uhwi_p (argl))
+ break;
+ unsigned HOST_WIDE_INT mask = tree_to_uhwi (argl);
+ unsigned elems = TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg0));
+ if ((mask | (HOST_WIDE_INT_M1U << elems)) != HOST_WIDE_INT_M1U)
+ break;
+ }
+ arg1 = ix86_vector_shift_count (arg1);
+ if (!arg1)
+ break;
+ count = tree_to_uhwi (arg1);
+ if (count == 0)
+ {
+ /* Just return the first argument for shift by 0. */
+ location_t loc = gimple_location (stmt);
+ gimple *g = gimple_build_assign (gimple_call_lhs (stmt), arg0);
+ gimple_set_location (g, loc);
+ gsi_replace (gsi, g, false);
+ return true;
+ }
+ if (rcode != ASHIFTRT
+ && count >= TYPE_PRECISION (TREE_TYPE (TREE_TYPE (arg0))))
+ {
+ /* For shift counts equal or greater than precision, except for
+ arithmetic right shift the result is zero. */
+ location_t loc = gimple_location (stmt);
+ gimple *g = gimple_build_assign (gimple_call_lhs (stmt),
+ build_zero_cst (TREE_TYPE (arg0)));
+ gimple_set_location (g, loc);
+ gsi_replace (gsi, g, false);
+ return true;
+ }
+ break;
+
default:
break;
}