aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJakub Jelinek <jakub@redhat.com>2017-10-12 21:10:34 +0200
committerJakub Jelinek <jakub@gcc.gnu.org>2017-10-12 21:10:34 +0200
commit912a7ec3b1f424ee8c6dce7965bd2f98df174dea (patch)
tree8ce05f2a6a420f4c899eda179a769703b2321d36
parent1baafc8d197217a11085f48a93ccd23dc59d2edb (diff)
downloadgcc-912a7ec3b1f424ee8c6dce7965bd2f98df174dea.zip
gcc-912a7ec3b1f424ee8c6dce7965bd2f98df174dea.tar.gz
gcc-912a7ec3b1f424ee8c6dce7965bd2f98df174dea.tar.bz2
re PR target/82498 (Missed optimization for x86 rotate instruction)
PR target/82498 * config/i386/i386.md (*ashl<mode>3_mask_1, *<shift_insn><mode>3_mask_1, *<rotate_insn><mode>3_mask_1, *<btsc><mode>_mask_1, *btr<mode>_mask_1): New define_insn_and_split patterns. * gcc.target/i386/pr82498-1.c: New test. * gcc.target/i386/pr82498-2.c: New test. From-SVN: r253695
-rw-r--r--gcc/ChangeLog8
-rw-r--r--gcc/config/i386/i386.md108
-rw-r--r--gcc/testsuite/ChangeLog6
-rw-r--r--gcc/testsuite/gcc.target/i386/pr82498-1.c52
-rw-r--r--gcc/testsuite/gcc.target/i386/pr82498-2.c46
5 files changed, 220 insertions, 0 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 92cdc5f..a9dbfce 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,11 @@
+2017-10-12 Jakub Jelinek <jakub@redhat.com>
+
+ PR target/82498
+ * config/i386/i386.md (*ashl<mode>3_mask_1,
+ *<shift_insn><mode>3_mask_1, *<rotate_insn><mode>3_mask_1,
+ *<btsc><mode>_mask_1, *btr<mode>_mask_1): New define_insn_and_split
+ patterns.
+
2017-10-12 Jan Hubicka <hubicka@ucw.cz>
* profile-count.h (safe_scale_64bit): Fix GCC4.x path.
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 9e1f85f..2fa982c 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -10228,6 +10228,26 @@
(clobber (reg:CC FLAGS_REG))])]
"operands[2] = gen_lowpart (QImode, operands[2]);")
+(define_insn_and_split "*ashl<mode>3_mask_1"
+ [(set (match_operand:SWI48 0 "nonimmediate_operand")
+ (ashift:SWI48
+ (match_operand:SWI48 1 "nonimmediate_operand")
+ (and:QI
+ (match_operand:QI 2 "register_operand")
+ (match_operand:QI 3 "const_int_operand"))))
+ (clobber (reg:CC FLAGS_REG))]
+ "ix86_binary_operator_ok (ASHIFT, <MODE>mode, operands)
+ && (INTVAL (operands[3]) & (GET_MODE_BITSIZE (<MODE>mode)-1))
+ == GET_MODE_BITSIZE (<MODE>mode)-1
+ && can_create_pseudo_p ()"
+ "#"
+ "&& 1"
+ [(parallel
+ [(set (match_dup 0)
+ (ashift:SWI48 (match_dup 1)
+ (match_dup 2)))
+ (clobber (reg:CC FLAGS_REG))])])
+
(define_insn "*bmi2_ashl<mode>3_1"
[(set (match_operand:SWI48 0 "register_operand" "=r")
(ashift:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "rm")
@@ -10728,6 +10748,26 @@
(clobber (reg:CC FLAGS_REG))])]
"operands[2] = gen_lowpart (QImode, operands[2]);")
+(define_insn_and_split "*<shift_insn><mode>3_mask_1"
+ [(set (match_operand:SWI48 0 "nonimmediate_operand")
+ (any_shiftrt:SWI48
+ (match_operand:SWI48 1 "nonimmediate_operand")
+ (and:QI
+ (match_operand:QI 2 "register_operand")
+ (match_operand:QI 3 "const_int_operand"))))
+ (clobber (reg:CC FLAGS_REG))]
+ "ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)
+ && (INTVAL (operands[3]) & (GET_MODE_BITSIZE (<MODE>mode)-1))
+ == GET_MODE_BITSIZE (<MODE>mode)-1
+ && can_create_pseudo_p ()"
+ "#"
+ "&& 1"
+ [(parallel
+ [(set (match_dup 0)
+ (any_shiftrt:SWI48 (match_dup 1)
+ (match_dup 2)))
+ (clobber (reg:CC FLAGS_REG))])])
+
(define_insn_and_split "*<shift_insn><mode>3_doubleword"
[(set (match_operand:DWI 0 "register_operand" "=&r")
(any_shiftrt:DWI (match_operand:DWI 1 "register_operand" "0")
@@ -11187,6 +11227,26 @@
(clobber (reg:CC FLAGS_REG))])]
"operands[2] = gen_lowpart (QImode, operands[2]);")
+(define_insn_and_split "*<rotate_insn><mode>3_mask_1"
+ [(set (match_operand:SWI48 0 "nonimmediate_operand")
+ (any_rotate:SWI48
+ (match_operand:SWI48 1 "nonimmediate_operand")
+ (and:QI
+ (match_operand:QI 2 "register_operand")
+ (match_operand:QI 3 "const_int_operand"))))
+ (clobber (reg:CC FLAGS_REG))]
+ "ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)
+ && (INTVAL (operands[3]) & (GET_MODE_BITSIZE (<MODE>mode)-1))
+ == GET_MODE_BITSIZE (<MODE>mode)-1
+ && can_create_pseudo_p ()"
+ "#"
+ "&& 1"
+ [(parallel
+ [(set (match_dup 0)
+ (any_rotate:SWI48 (match_dup 1)
+ (match_dup 2)))
+ (clobber (reg:CC FLAGS_REG))])])
+
;; Implement rotation using two double-precision
;; shift instructions and a scratch register.
@@ -11494,6 +11554,30 @@
(clobber (reg:CC FLAGS_REG))])]
"operands[1] = gen_lowpart (QImode, operands[1]);")
+(define_insn_and_split "*<btsc><mode>_mask_1"
+ [(set (match_operand:SWI48 0 "register_operand")
+ (any_or:SWI48
+ (ashift:SWI48
+ (const_int 1)
+ (and:QI
+ (match_operand:QI 1 "register_operand")
+ (match_operand:QI 2 "const_int_operand")))
+ (match_operand:SWI48 3 "register_operand")))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_USE_BT
+ && (INTVAL (operands[2]) & (GET_MODE_BITSIZE (<MODE>mode)-1))
+ == GET_MODE_BITSIZE (<MODE>mode)-1
+ && can_create_pseudo_p ()"
+ "#"
+ "&& 1"
+ [(parallel
+ [(set (match_dup 0)
+ (any_or:SWI48
+ (ashift:SWI48 (const_int 1)
+ (match_dup 1))
+ (match_dup 3)))
+ (clobber (reg:CC FLAGS_REG))])])
+
(define_insn "*btr<mode>"
[(set (match_operand:SWI48 0 "register_operand" "=r")
(and:SWI48
@@ -11535,6 +11619,30 @@
(clobber (reg:CC FLAGS_REG))])]
"operands[1] = gen_lowpart (QImode, operands[1]);")
+(define_insn_and_split "*btr<mode>_mask_1"
+ [(set (match_operand:SWI48 0 "register_operand")
+ (and:SWI48
+ (rotate:SWI48
+ (const_int -2)
+ (and:QI
+ (match_operand:QI 1 "register_operand")
+ (match_operand:QI 2 "const_int_operand")))
+ (match_operand:SWI48 3 "register_operand")))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_USE_BT
+ && (INTVAL (operands[2]) & (GET_MODE_BITSIZE (<MODE>mode)-1))
+ == GET_MODE_BITSIZE (<MODE>mode)-1
+ && can_create_pseudo_p ()"
+ "#"
+ "&& 1"
+ [(parallel
+ [(set (match_dup 0)
+ (and:SWI48
+ (rotate:SWI48 (const_int -2)
+ (match_dup 1))
+ (match_dup 3)))
+ (clobber (reg:CC FLAGS_REG))])])
+
;; These instructions are never faster than the corresponding
;; and/ior/xor operations when using immediate operand, so with
;; 32-bit there's no point. But in 64-bit, we can't hold the
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index c804364..9e77fa6 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,9 @@
+2017-10-12 Jakub Jelinek <jakub@redhat.com>
+
+ PR target/82498
+ * gcc.target/i386/pr82498-1.c: New test.
+ * gcc.target/i386/pr82498-2.c: New test.
+
2017-10-12 Jan Hubicka <hubicka@ucw.cz>
* gcc.dg/predict-13.c: Update template for probaility change.
diff --git a/gcc/testsuite/gcc.target/i386/pr82498-1.c b/gcc/testsuite/gcc.target/i386/pr82498-1.c
new file mode 100644
index 0000000..78a6698
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr82498-1.c
@@ -0,0 +1,52 @@
+/* PR target/82498 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -mtune=generic -masm=att" } */
+/* { dg-final { scan-assembler-not {\mand[bwlq]\M} } } */
+
+unsigned
+f1 (unsigned x, unsigned char y)
+{
+ if (y == 0)
+ return x;
+ y &= __CHAR_BIT__ * __SIZEOF_INT__ - 1;
+ return (x << y) | (x >> (__CHAR_BIT__ * __SIZEOF_INT__ - y));
+}
+
+unsigned
+f2 (unsigned x, unsigned y)
+{
+ if (y == 0)
+ return x;
+ y &= __CHAR_BIT__ * __SIZEOF_INT__ - 1;
+ return (x << y) | (x >> (__CHAR_BIT__ * __SIZEOF_INT__ - y));
+}
+
+unsigned
+f3 (unsigned x, unsigned short y)
+{
+ if (y == 0)
+ return x;
+ y &= __CHAR_BIT__ * __SIZEOF_INT__ - 1;
+ return (x << y) | (x >> (__CHAR_BIT__ * __SIZEOF_INT__ - y));
+}
+
+unsigned
+f4 (unsigned x, unsigned char y)
+{
+ y &= __CHAR_BIT__ * __SIZEOF_INT__ - 1;
+ return (x << y) | (x >> (-y & (__CHAR_BIT__ * __SIZEOF_INT__ - 1)));
+}
+
+unsigned
+f5 (unsigned x, unsigned int y)
+{
+ y &= __CHAR_BIT__ * __SIZEOF_INT__ - 1;
+ return (x << y) | (x >> (-y & (__CHAR_BIT__ * __SIZEOF_INT__ - 1)));
+}
+
+unsigned
+f6 (unsigned x, unsigned short y)
+{
+ y &= __CHAR_BIT__ * __SIZEOF_INT__ - 1;
+ return (x << y) | (x >> (-y & (__CHAR_BIT__ * __SIZEOF_INT__ - 1)));
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr82498-2.c b/gcc/testsuite/gcc.target/i386/pr82498-2.c
new file mode 100644
index 0000000..9e065ee
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr82498-2.c
@@ -0,0 +1,46 @@
+/* PR target/82498 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -mtune=generic -masm=att" } */
+/* { dg-final { scan-assembler-not {\mand[bwlq]\M} } } */
+
+int
+f1 (int x, unsigned char y)
+{
+ y &= __CHAR_BIT__ * __SIZEOF_INT__ - 1;
+ return x >> y;
+}
+
+unsigned
+f2 (unsigned x, unsigned char y)
+{
+ y &= __CHAR_BIT__ * __SIZEOF_INT__ - 1;
+ return x >> y;
+}
+
+unsigned
+f3 (unsigned x, unsigned char y)
+{
+ y &= __CHAR_BIT__ * __SIZEOF_INT__ - 1;
+ return x << y;
+}
+
+unsigned
+f4 (unsigned x, unsigned char y)
+{
+ y &= __CHAR_BIT__ * __SIZEOF_INT__ - 1;
+ return x | (1U << y);
+}
+
+unsigned
+f5 (unsigned x, unsigned char y)
+{
+ y &= __CHAR_BIT__ * __SIZEOF_INT__ - 1;
+ return x ^ (1U << y);
+}
+
+unsigned
+f6 (unsigned x, unsigned char y)
+{
+ y &= __CHAR_BIT__ * __SIZEOF_INT__ - 1;
+ return (x + 2) & ~(1U << y);
+}