aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
authorJakub Jelinek <jakub@gcc.gnu.org>2017-11-30 11:29:58 +0100
committerJakub Jelinek <jakub@gcc.gnu.org>2017-11-30 11:29:58 +0100
commit89b1427f8693699d64050cc4daf5626f6b96b96a (patch)
tree6eb12921ec5d8b6fd580bf01c82cbdc59ac2966c /gcc
parent7c080ade9d8198958a1a37854d5cc56f7b76b9f4 (diff)
downloadgcc-89b1427f8693699d64050cc4daf5626f6b96b96a.zip
gcc-89b1427f8693699d64050cc4daf5626f6b96b96a.tar.gz
gcc-89b1427f8693699d64050cc4daf5626f6b96b96a.tar.bz2
re PR target/83210 (__builtin_mul_overflow() generates suboptimal code when exactly one argument is the constant 2)
PR target/83210 * internal-fn.c (expand_mul_overflow): Optimize unsigned multiplication by power of 2 constant into two shifts + comparison. * gcc.target/i386/pr83210.c: New test. From-SVN: r255269
Diffstat (limited to 'gcc')
-rw-r--r--gcc/ChangeLog8
-rw-r--r--gcc/internal-fn.c43
-rw-r--r--gcc/testsuite/ChangeLog5
-rw-r--r--gcc/testsuite/gcc.target/i386/pr83210.c53
4 files changed, 108 insertions, 1 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 424c7e7..776508a 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,7 +1,13 @@
+2017-11-30 Jakub Jelinek <jakub@redhat.com>
+
+ PR target/83210
+ * internal-fn.c (expand_mul_overflow): Optimize unsigned
+ multiplication by power of 2 constant into two shifts + comparison.
+
2017-11-30 Jan Hubicka <hubicka@ucw.cz>
PR target/81616
- * x86-tnue-costs.h (generic_cost): Revise for modern CPUs
+ * config/i386/x86-tune-costs.h (generic_cost): Revise for modern CPUs.
2017-11-30 Richard Biener <rguenther@suse.de>
diff --git a/gcc/internal-fn.c b/gcc/internal-fn.c
index 119fa1a..7ddc524 100644
--- a/gcc/internal-fn.c
+++ b/gcc/internal-fn.c
@@ -1462,6 +1462,49 @@ expand_mul_overflow (location_t loc, tree lhs, tree arg0, tree arg1,
type = build_nonstandard_integer_type (GET_MODE_PRECISION (mode), uns);
sign = uns ? UNSIGNED : SIGNED;
icode = optab_handler (uns ? umulv4_optab : mulv4_optab, mode);
+ if (uns
+ && (integer_pow2p (arg0) || integer_pow2p (arg1))
+ && (optimize_insn_for_speed_p () || icode == CODE_FOR_nothing))
+ {
+ /* Optimize unsigned multiplication by power of 2 constant
+ using 2 shifts, one for result, one to extract the shifted
+ out bits to see if they are all zero.
+ Don't do this if optimizing for size and we have umulv4_optab,
+ in that case assume multiplication will be shorter.
+ This is heuristics based on the single target that provides
+ umulv4 right now (i?86/x86_64), if further targets add it, this
+ might need to be revisited.
+ Cases where both operands are constant should be folded already
+ during GIMPLE, and cases where one operand is constant but not
+ power of 2 are questionable, either the WIDEN_MULT_EXPR case
+ below can be done without multiplication, just by shifts and adds,
+ or we'd need to divide the result (and hope it actually doesn't
+ really divide nor multiply) and compare the result of the division
+ with the original operand. */
+ rtx opn0 = op0;
+ rtx opn1 = op1;
+ tree argn0 = arg0;
+ tree argn1 = arg1;
+ if (integer_pow2p (arg0))
+ {
+ std::swap (opn0, opn1);
+ std::swap (argn0, argn1);
+ }
+ int cnt = tree_log2 (argn1);
+ if (cnt >= 0 && cnt < GET_MODE_PRECISION (mode))
+ {
+ rtx upper = const0_rtx;
+ res = expand_shift (LSHIFT_EXPR, mode, opn0, cnt, NULL_RTX, uns);
+ if (cnt != 0)
+ upper = expand_shift (RSHIFT_EXPR, mode, opn0,
+ GET_MODE_PRECISION (mode) - cnt,
+ NULL_RTX, uns);
+ do_compare_rtx_and_jump (upper, const0_rtx, EQ, true, mode,
+ NULL_RTX, NULL, done_label,
+ profile_probability::very_likely ());
+ goto do_error_label;
+ }
+ }
if (icode != CODE_FOR_nothing)
{
struct expand_operand ops[4];
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index a7f3c6f..bda1bf5 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,8 @@
+2017-11-30 Jakub Jelinek <jakub@redhat.com>
+
+ PR target/83210
+ * gcc.target/i386/pr83210.c: New test.
+
2017-11-30 Jan Hubicka <hubicka@ucw.cz>
PR target/81616
diff --git a/gcc/testsuite/gcc.target/i386/pr83210.c b/gcc/testsuite/gcc.target/i386/pr83210.c
new file mode 100644
index 0000000..cf985d2
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr83210.c
@@ -0,0 +1,53 @@
+/* PR target/83210 */
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+/* { dg-final { scan-assembler-not {\mmul[lq]\M} } } */
+
+void bar (void);
+
+unsigned
+f1 (unsigned int x)
+{
+ unsigned res;
+ if (__builtin_mul_overflow (x, 2, &res))
+ bar ();
+ return res;
+}
+
+unsigned long
+f2 (unsigned long x)
+{
+ unsigned long res;
+ if (__builtin_mul_overflow (16, x, &res))
+ bar ();
+ return res;
+}
+
+unsigned long long
+f3 (unsigned long long x)
+{
+ unsigned long long res;
+ if (__builtin_mul_overflow (x, (1ULL << (__SIZEOF_LONG_LONG__ * __CHAR_BIT__ - 1)), &res))
+ bar ();
+ return res;
+}
+
+#ifdef __SIZEOF_INT128__
+unsigned __int128
+f4 (unsigned __int128 x)
+{
+ unsigned __int128 res;
+ if (__builtin_mul_overflow (x, (((unsigned __int128) 1) << (__SIZEOF_INT128__ * __CHAR_BIT__ / 2)), &res))
+ bar ();
+ return res;
+}
+
+unsigned __int128
+f5 (unsigned __int128 x)
+{
+ unsigned __int128 res;
+ if (__builtin_mul_overflow (x, (((unsigned __int128) 1) << (__SIZEOF_INT128__ * __CHAR_BIT__ / 2 + 3)), &res))
+ bar ();
+ return res;
+}
+#endif