diff options
author | Jakub Jelinek <jakub@redhat.com> | 2018-05-17 11:54:36 +0200 |
---|---|---|
committer | Jakub Jelinek <jakub@gcc.gnu.org> | 2018-05-17 11:54:36 +0200 |
commit | 28a8a768ebef5e31f950013f1b48b14c008b4b3b (patch) | |
tree | b55a59fb3a2c1d50c6700d7191faf3008347f9ad /gcc/config/i386 | |
parent | 4e6a811fad69dde184dd2900d4809a6cd7b42cf9 (diff) | |
download | gcc-28a8a768ebef5e31f950013f1b48b14c008b4b3b.zip gcc-28a8a768ebef5e31f950013f1b48b14c008b4b3b.tar.gz gcc-28a8a768ebef5e31f950013f1b48b14c008b4b3b.tar.bz2 |
re PR target/85323 (SSE/AVX/AVX512 shift by 0 not optimized away)
PR target/85323
* config/i386/i386.c: Include tree-vector-builder.h.
(ix86_vector_shift_count): New function.
(ix86_fold_builtin): Fold shift builtins by scalar count.
(ix86_gimple_fold_builtin): Likewise.
* gcc.target/i386/pr85323-1.c: New test.
* gcc.target/i386/pr85323-2.c: New test.
* gcc.target/i386/pr85323-3.c: New test.
From-SVN: r260311
Diffstat (limited to 'gcc/config/i386')
-rw-r--r-- | gcc/config/i386/i386.c | 340 |
1 files changed, 340 insertions, 0 deletions
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index 0c7a6b7..f802add 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -91,6 +91,7 @@ along with GCC; see the file COPYING3. If not see #include "ipa-prop.h" #include "ipa-fnsummary.h" #include "wide-int-bitmask.h" +#include "tree-vector-builder.h" /* This file should be included last. */ #include "target-def.h" @@ -33365,6 +33366,28 @@ fold_builtin_cpu (tree fndecl, tree *args) gcc_unreachable (); } +/* Return the shift count of a vector by scalar shift builtin second argument + ARG1. */ +static tree +ix86_vector_shift_count (tree arg1) +{ + if (tree_fits_uhwi_p (arg1)) + return arg1; + else if (TREE_CODE (arg1) == VECTOR_CST && CHAR_BIT == 8) + { + /* The count argument is weird, passed in as various 128-bit + (or 64-bit) vectors, the low 64 bits from it are the count. */ + unsigned char buf[16]; + int len = native_encode_expr (arg1, buf, 16); + if (len == 0) + return NULL_TREE; + tree t = native_interpret_expr (uint64_type_node, buf, len); + if (t && tree_fits_uhwi_p (t)) + return t; + } + return NULL_TREE; +} + static tree ix86_fold_builtin (tree fndecl, int n_args, tree *args, bool ignore ATTRIBUTE_UNUSED) @@ -33373,6 +33396,8 @@ ix86_fold_builtin (tree fndecl, int n_args, { enum ix86_builtins fn_code = (enum ix86_builtins) DECL_FUNCTION_CODE (fndecl); + enum rtx_code rcode; + switch (fn_code) { case IX86_BUILTIN_CPU_IS: @@ -33553,6 +33578,168 @@ ix86_fold_builtin (tree fndecl, int n_args, } break; + case IX86_BUILTIN_PSLLD: + case IX86_BUILTIN_PSLLD128: + case IX86_BUILTIN_PSLLD128_MASK: + case IX86_BUILTIN_PSLLD256: + case IX86_BUILTIN_PSLLD256_MASK: + case IX86_BUILTIN_PSLLD512: + case IX86_BUILTIN_PSLLDI: + case IX86_BUILTIN_PSLLDI128: + case IX86_BUILTIN_PSLLDI128_MASK: + case IX86_BUILTIN_PSLLDI256: + case IX86_BUILTIN_PSLLDI256_MASK: + case IX86_BUILTIN_PSLLDI512: + case IX86_BUILTIN_PSLLQ: + case IX86_BUILTIN_PSLLQ128: + case IX86_BUILTIN_PSLLQ128_MASK: + case IX86_BUILTIN_PSLLQ256: + case IX86_BUILTIN_PSLLQ256_MASK: + case IX86_BUILTIN_PSLLQ512: + case IX86_BUILTIN_PSLLQI: + case IX86_BUILTIN_PSLLQI128: + case IX86_BUILTIN_PSLLQI128_MASK: + case IX86_BUILTIN_PSLLQI256: + case IX86_BUILTIN_PSLLQI256_MASK: + case IX86_BUILTIN_PSLLQI512: + case IX86_BUILTIN_PSLLW: + case IX86_BUILTIN_PSLLW128: + case IX86_BUILTIN_PSLLW128_MASK: + case IX86_BUILTIN_PSLLW256: + case IX86_BUILTIN_PSLLW256_MASK: + case IX86_BUILTIN_PSLLW512_MASK: + case IX86_BUILTIN_PSLLWI: + case IX86_BUILTIN_PSLLWI128: + case IX86_BUILTIN_PSLLWI128_MASK: + case IX86_BUILTIN_PSLLWI256: + case IX86_BUILTIN_PSLLWI256_MASK: + case IX86_BUILTIN_PSLLWI512_MASK: + rcode = ASHIFT; + goto do_shift; + case IX86_BUILTIN_PSRAD: + case IX86_BUILTIN_PSRAD128: + case IX86_BUILTIN_PSRAD128_MASK: + case IX86_BUILTIN_PSRAD256: + case IX86_BUILTIN_PSRAD256_MASK: + case IX86_BUILTIN_PSRAD512: + case IX86_BUILTIN_PSRADI: + case IX86_BUILTIN_PSRADI128: + case IX86_BUILTIN_PSRADI128_MASK: + case IX86_BUILTIN_PSRADI256: + case IX86_BUILTIN_PSRADI256_MASK: + case IX86_BUILTIN_PSRADI512: + case IX86_BUILTIN_PSRAQ128_MASK: + case IX86_BUILTIN_PSRAQ256_MASK: + case IX86_BUILTIN_PSRAQ512: + case IX86_BUILTIN_PSRAQI128_MASK: + case IX86_BUILTIN_PSRAQI256_MASK: + case IX86_BUILTIN_PSRAQI512: + case IX86_BUILTIN_PSRAW: + case IX86_BUILTIN_PSRAW128: + case IX86_BUILTIN_PSRAW128_MASK: + case IX86_BUILTIN_PSRAW256: + case IX86_BUILTIN_PSRAW256_MASK: + case IX86_BUILTIN_PSRAW512: + case IX86_BUILTIN_PSRAWI: + case IX86_BUILTIN_PSRAWI128: + case IX86_BUILTIN_PSRAWI128_MASK: + case IX86_BUILTIN_PSRAWI256: + case IX86_BUILTIN_PSRAWI256_MASK: + case IX86_BUILTIN_PSRAWI512: + rcode = ASHIFTRT; + goto do_shift; + case IX86_BUILTIN_PSRLD: + case IX86_BUILTIN_PSRLD128: + case IX86_BUILTIN_PSRLD128_MASK: + case IX86_BUILTIN_PSRLD256: + case IX86_BUILTIN_PSRLD256_MASK: + case IX86_BUILTIN_PSRLD512: + case IX86_BUILTIN_PSRLDI: + case IX86_BUILTIN_PSRLDI128: + case IX86_BUILTIN_PSRLDI128_MASK: + case IX86_BUILTIN_PSRLDI256: + case IX86_BUILTIN_PSRLDI256_MASK: + case IX86_BUILTIN_PSRLDI512: + case IX86_BUILTIN_PSRLQ: + case IX86_BUILTIN_PSRLQ128: + case IX86_BUILTIN_PSRLQ128_MASK: + case IX86_BUILTIN_PSRLQ256: + case IX86_BUILTIN_PSRLQ256_MASK: + case IX86_BUILTIN_PSRLQ512: + case IX86_BUILTIN_PSRLQI: + case IX86_BUILTIN_PSRLQI128: + case IX86_BUILTIN_PSRLQI128_MASK: + case IX86_BUILTIN_PSRLQI256: + case IX86_BUILTIN_PSRLQI256_MASK: + case IX86_BUILTIN_PSRLQI512: + case IX86_BUILTIN_PSRLW: + case IX86_BUILTIN_PSRLW128: + case IX86_BUILTIN_PSRLW128_MASK: + case IX86_BUILTIN_PSRLW256: + case IX86_BUILTIN_PSRLW256_MASK: + case IX86_BUILTIN_PSRLW512: + case IX86_BUILTIN_PSRLWI: + case IX86_BUILTIN_PSRLWI128: + case IX86_BUILTIN_PSRLWI128_MASK: + case IX86_BUILTIN_PSRLWI256: + case IX86_BUILTIN_PSRLWI256_MASK: + case IX86_BUILTIN_PSRLWI512: + rcode = LSHIFTRT; + goto do_shift; + + do_shift: + gcc_assert (n_args >= 2); + if (TREE_CODE (args[0]) != VECTOR_CST) + break; + if (n_args > 2) + { + /* This is masked shift. Only optimize if the mask is all + ones. */ + if (!tree_fits_uhwi_p (args[n_args - 1]) + || TREE_SIDE_EFFECTS (args[n_args - 2])) + break; + unsigned HOST_WIDE_INT mask = tree_to_uhwi (args[n_args - 1]); + unsigned elems = TYPE_VECTOR_SUBPARTS (TREE_TYPE (args[0])); + if ((mask | (HOST_WIDE_INT_M1U << elems)) != HOST_WIDE_INT_M1U) + break; + } + if (tree tem = ix86_vector_shift_count (args[1])) + { + unsigned HOST_WIDE_INT count = tree_to_uhwi (tem); + if (count == 0) + return args[0]; + if (count >= TYPE_PRECISION (TREE_TYPE (TREE_TYPE (args[0])))) + { + if (rcode != ASHIFTRT) + return build_zero_cst (TREE_TYPE (args[0])); + count = TYPE_PRECISION (TREE_TYPE (TREE_TYPE (args[0]))) - 1; + } + tree countt = build_int_cst (integer_type_node, count); + tree_vector_builder builder; + builder.new_unary_operation (TREE_TYPE (args[0]), args[0], + false); + unsigned int cnt = builder.encoded_nelts (); + for (unsigned int i = 0; i < cnt; ++i) + { + tree elt = VECTOR_CST_ELT (args[0], i); + if (TREE_CODE (elt) != INTEGER_CST || TREE_OVERFLOW (elt)) + return NULL_TREE; + tree type = TREE_TYPE (elt); + if (rcode == LSHIFTRT) + elt = fold_convert (unsigned_type_for (type), elt); + elt = const_binop (rcode == ASHIFT + ? LSHIFT_EXPR : RSHIFT_EXPR, TREE_TYPE (elt), + elt, countt); + if (!elt || TREE_CODE (elt) != INTEGER_CST) + return NULL_TREE; + if (rcode == LSHIFTRT) + elt = fold_convert (type, elt); + builder.quick_push (elt); + } + return builder.build (); + } + break; + default: break; } @@ -33578,6 +33765,8 @@ ix86_gimple_fold_builtin (gimple_stmt_iterator *gsi) enum ix86_builtins fn_code = (enum ix86_builtins) DECL_FUNCTION_CODE (fndecl); tree decl = NULL_TREE; tree arg0, arg1; + enum rtx_code rcode; + unsigned HOST_WIDE_INT count; switch (fn_code) { @@ -33657,6 +33846,157 @@ ix86_gimple_fold_builtin (gimple_stmt_iterator *gsi) } break; + case IX86_BUILTIN_PSLLD: + case IX86_BUILTIN_PSLLD128: + case IX86_BUILTIN_PSLLD128_MASK: + case IX86_BUILTIN_PSLLD256: + case IX86_BUILTIN_PSLLD256_MASK: + case IX86_BUILTIN_PSLLD512: + case IX86_BUILTIN_PSLLDI: + case IX86_BUILTIN_PSLLDI128: + case IX86_BUILTIN_PSLLDI128_MASK: + case IX86_BUILTIN_PSLLDI256: + case IX86_BUILTIN_PSLLDI256_MASK: + case IX86_BUILTIN_PSLLDI512: + case IX86_BUILTIN_PSLLQ: + case IX86_BUILTIN_PSLLQ128: + case IX86_BUILTIN_PSLLQ128_MASK: + case IX86_BUILTIN_PSLLQ256: + case IX86_BUILTIN_PSLLQ256_MASK: + case IX86_BUILTIN_PSLLQ512: + case IX86_BUILTIN_PSLLQI: + case IX86_BUILTIN_PSLLQI128: + case IX86_BUILTIN_PSLLQI128_MASK: + case IX86_BUILTIN_PSLLQI256: + case IX86_BUILTIN_PSLLQI256_MASK: + case IX86_BUILTIN_PSLLQI512: + case IX86_BUILTIN_PSLLW: + case IX86_BUILTIN_PSLLW128: + case IX86_BUILTIN_PSLLW128_MASK: + case IX86_BUILTIN_PSLLW256: + case IX86_BUILTIN_PSLLW256_MASK: + case IX86_BUILTIN_PSLLW512_MASK: + case IX86_BUILTIN_PSLLWI: + case IX86_BUILTIN_PSLLWI128: + case IX86_BUILTIN_PSLLWI128_MASK: + case IX86_BUILTIN_PSLLWI256: + case IX86_BUILTIN_PSLLWI256_MASK: + case IX86_BUILTIN_PSLLWI512_MASK: + rcode = ASHIFT; + goto do_shift; + case IX86_BUILTIN_PSRAD: + case IX86_BUILTIN_PSRAD128: + case IX86_BUILTIN_PSRAD128_MASK: + case IX86_BUILTIN_PSRAD256: + case IX86_BUILTIN_PSRAD256_MASK: + case IX86_BUILTIN_PSRAD512: + case IX86_BUILTIN_PSRADI: + case IX86_BUILTIN_PSRADI128: + case IX86_BUILTIN_PSRADI128_MASK: + case IX86_BUILTIN_PSRADI256: + case IX86_BUILTIN_PSRADI256_MASK: + case IX86_BUILTIN_PSRADI512: + case IX86_BUILTIN_PSRAQ128_MASK: + case IX86_BUILTIN_PSRAQ256_MASK: + case IX86_BUILTIN_PSRAQ512: + case IX86_BUILTIN_PSRAQI128_MASK: + case IX86_BUILTIN_PSRAQI256_MASK: + case IX86_BUILTIN_PSRAQI512: + case IX86_BUILTIN_PSRAW: + case IX86_BUILTIN_PSRAW128: + case IX86_BUILTIN_PSRAW128_MASK: + case IX86_BUILTIN_PSRAW256: + case IX86_BUILTIN_PSRAW256_MASK: + case IX86_BUILTIN_PSRAW512: + case IX86_BUILTIN_PSRAWI: + case IX86_BUILTIN_PSRAWI128: + case IX86_BUILTIN_PSRAWI128_MASK: + case IX86_BUILTIN_PSRAWI256: + case IX86_BUILTIN_PSRAWI256_MASK: + case IX86_BUILTIN_PSRAWI512: + rcode = ASHIFTRT; + goto do_shift; + case IX86_BUILTIN_PSRLD: + case IX86_BUILTIN_PSRLD128: + case IX86_BUILTIN_PSRLD128_MASK: + case IX86_BUILTIN_PSRLD256: + case IX86_BUILTIN_PSRLD256_MASK: + case IX86_BUILTIN_PSRLD512: + case IX86_BUILTIN_PSRLDI: + case IX86_BUILTIN_PSRLDI128: + case IX86_BUILTIN_PSRLDI128_MASK: + case IX86_BUILTIN_PSRLDI256: + case IX86_BUILTIN_PSRLDI256_MASK: + case IX86_BUILTIN_PSRLDI512: + case IX86_BUILTIN_PSRLQ: + case IX86_BUILTIN_PSRLQ128: + case IX86_BUILTIN_PSRLQ128_MASK: + case IX86_BUILTIN_PSRLQ256: + case IX86_BUILTIN_PSRLQ256_MASK: + case IX86_BUILTIN_PSRLQ512: + case IX86_BUILTIN_PSRLQI: + case IX86_BUILTIN_PSRLQI128: + case IX86_BUILTIN_PSRLQI128_MASK: + case IX86_BUILTIN_PSRLQI256: + case IX86_BUILTIN_PSRLQI256_MASK: + case IX86_BUILTIN_PSRLQI512: + case IX86_BUILTIN_PSRLW: + case IX86_BUILTIN_PSRLW128: + case IX86_BUILTIN_PSRLW128_MASK: + case IX86_BUILTIN_PSRLW256: + case IX86_BUILTIN_PSRLW256_MASK: + case IX86_BUILTIN_PSRLW512: + case IX86_BUILTIN_PSRLWI: + case IX86_BUILTIN_PSRLWI128: + case IX86_BUILTIN_PSRLWI128_MASK: + case IX86_BUILTIN_PSRLWI256: + case IX86_BUILTIN_PSRLWI256_MASK: + case IX86_BUILTIN_PSRLWI512: + rcode = LSHIFTRT; + goto do_shift; + + do_shift: + gcc_assert (n_args >= 2); + arg0 = gimple_call_arg (stmt, 0); + arg1 = gimple_call_arg (stmt, 1); + if (n_args > 2) + { + /* This is masked shift. Only optimize if the mask is all ones. */ + tree argl = gimple_call_arg (stmt, n_args - 1); + if (!tree_fits_uhwi_p (argl)) + break; + unsigned HOST_WIDE_INT mask = tree_to_uhwi (argl); + unsigned elems = TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg0)); + if ((mask | (HOST_WIDE_INT_M1U << elems)) != HOST_WIDE_INT_M1U) + break; + } + arg1 = ix86_vector_shift_count (arg1); + if (!arg1) + break; + count = tree_to_uhwi (arg1); + if (count == 0) + { + /* Just return the first argument for shift by 0. */ + location_t loc = gimple_location (stmt); + gimple *g = gimple_build_assign (gimple_call_lhs (stmt), arg0); + gimple_set_location (g, loc); + gsi_replace (gsi, g, false); + return true; + } + if (rcode != ASHIFTRT + && count >= TYPE_PRECISION (TREE_TYPE (TREE_TYPE (arg0)))) + { + /* For shift counts equal or greater than precision, except for + arithmetic right shift the result is zero. */ + location_t loc = gimple_location (stmt); + gimple *g = gimple_build_assign (gimple_call_lhs (stmt), + build_zero_cst (TREE_TYPE (arg0))); + gimple_set_location (g, loc); + gsi_replace (gsi, g, false); + return true; + } + break; + default: break; } |