aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
authorJakub Jelinek <jakub@redhat.com>2024-12-07 11:40:12 +0100
committerJakub Jelinek <jakub@gcc.gnu.org>2024-12-07 11:40:12 +0100
commit4abcf4ad38a4081f02ed09aed7892a3a6af61cbb (patch)
tree94b2bcfd114b3960a2879b18419d6b8f769c10c7 /gcc
parentb7dd0d976022c5ba20d9d676e2f684614231eb72 (diff)
downloadgcc-4abcf4ad38a4081f02ed09aed7892a3a6af61cbb.zip
gcc-4abcf4ad38a4081f02ed09aed7892a3a6af61cbb.tar.gz
gcc-4abcf4ad38a4081f02ed09aed7892a3a6af61cbb.tar.bz2
i386: x r<< (c - y) to x r>> y etc. optimization [PR117930]
The following patch optimizes x r<< (c - y) to x r>> y, x r>> (c - y) to x r<< y, x r<< (c + y) to x r<< y and x r>> (c + y) to x r>> y if c is a multiple of x's bitsize. 2024-12-07 Jakub Jelinek <jakub@redhat.com> PR target/117930 * config/i386/i386.md (crotate): New define_code_attr. (*<insn><mode>3_add, *<insn><mode>3_add_1, *<insn><mode>3_sub, *<insn><mode>3_sub_1): New define_insn_and_split patterns plus following define_split for constant first input operand. * gcc.target/i386/pr117930.c: New test.
Diffstat (limited to 'gcc')
-rw-r--r--gcc/config/i386/i386.md141
-rw-r--r--gcc/testsuite/gcc.target/i386/pr117930.c118
2 files changed, 259 insertions, 0 deletions
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index ec816be..6edcb6d 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -1079,6 +1079,9 @@
;; Base name for insn mnemonic.
(define_code_attr rotate [(rotate "rol") (rotatert "ror")])
+;; Counter rotate.
+(define_code_attr crotate [(rotate "rotatert") (rotatert "rotate")])
+
;; Mapping of abs neg operators
(define_code_iterator absneg [abs neg])
@@ -18216,6 +18219,144 @@
(any_rotate:SWI (match_dup 4) (match_dup 2)))]
"operands[4] = gen_reg_rtx (<MODE>mode);")
+(define_insn_and_split "*<insn><mode>3_add"
+ [(set (match_operand:SWI 0 "nonimmediate_operand")
+ (any_rotate:SWI
+ (match_operand:SWI 1 "nonimmediate_operand")
+ (subreg:QI
+ (plus
+ (match_operand 2 "int_nonimmediate_operand")
+ (match_operand 3 "const_int_operand")) 0)))
+ (clobber (reg:CC FLAGS_REG))]
+ "ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)
+ && (INTVAL (operands[3]) & (<MODE_SIZE> * BITS_PER_UNIT - 1)) == 0
+ && ix86_pre_reload_split ()"
+ "#"
+ "&& 1"
+ [(parallel
+ [(set (match_dup 0)
+ (any_rotate:SWI (match_dup 1) (match_dup 2)))
+ (clobber (reg:CC FLAGS_REG))])]
+{
+ operands[2] = force_reg (GET_MODE (operands[2]), operands[2]);
+ operands[2] = gen_lowpart (QImode, operands[2]);
+})
+
+(define_split
+ [(set (match_operand:SWI 0 "register_operand")
+ (any_rotate:SWI
+ (match_operand:SWI 1 "const_int_operand")
+ (subreg:QI
+ (plus
+ (match_operand 2 "int248_register_operand")
+ (match_operand 3 "const_int_operand")) 0)))]
+ "(INTVAL (operands[3]) & (<MODE_SIZE> * BITS_PER_UNIT - 1)) == 0"
+ [(set (match_dup 4) (match_dup 1))
+ (set (match_dup 0)
+ (any_rotate:SWI (match_dup 4) (subreg:QI (match_dup 2) 0)))]
+ "operands[4] = gen_reg_rtx (<MODE>mode);")
+
+(define_insn_and_split "*<insn><mode>3_add_1"
+ [(set (match_operand:SWI 0 "nonimmediate_operand")
+ (any_rotate:SWI
+ (match_operand:SWI 1 "nonimmediate_operand")
+ (plus:QI
+ (match_operand:QI 2 "nonimmediate_operand")
+ (match_operand:QI 3 "const_int_operand"))))
+ (clobber (reg:CC FLAGS_REG))]
+ "ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)
+ && (INTVAL (operands[3]) & (<MODE_SIZE> * BITS_PER_UNIT - 1)) == 0
+ && ix86_pre_reload_split ()"
+ "#"
+ "&& 1"
+ [(parallel
+ [(set (match_dup 0)
+ (any_rotate:SWI (match_dup 1) (match_dup 2)))
+ (clobber (reg:CC FLAGS_REG))])]
+ "operands[2] = force_reg (GET_MODE (operands[2]), operands[2]);")
+
+(define_split
+ [(set (match_operand:SWI 0 "register_operand")
+ (any_rotate:SWI
+ (match_operand:SWI 1 "const_int_operand")
+ (plus:QI
+ (match_operand:QI 2 "register_operand")
+ (match_operand:QI 3 "const_int_operand"))))]
+ "(INTVAL (operands[3]) & (<MODE_SIZE> * BITS_PER_UNIT - 1)) == 0"
+ [(set (match_dup 4) (match_dup 1))
+ (set (match_dup 0)
+ (any_rotate:SWI (match_dup 4) (match_dup 2)))]
+ "operands[4] = gen_reg_rtx (<MODE>mode);")
+
+(define_insn_and_split "*<insn><mode>3_sub"
+ [(set (match_operand:SWI 0 "nonimmediate_operand")
+ (any_rotate:SWI
+ (match_operand:SWI 1 "nonimmediate_operand")
+ (subreg:QI
+ (minus
+ (match_operand 3 "const_int_operand")
+ (match_operand 2 "int_nonimmediate_operand")) 0)))
+ (clobber (reg:CC FLAGS_REG))]
+ "ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)
+ && (INTVAL (operands[3]) & (<MODE_SIZE> * BITS_PER_UNIT - 1)) == 0
+ && ix86_pre_reload_split ()"
+ "#"
+ "&& 1"
+ [(parallel
+ [(set (match_dup 0)
+ (<crotate>:SWI (match_dup 1) (match_dup 2)))
+ (clobber (reg:CC FLAGS_REG))])]
+{
+ operands[2] = force_reg (GET_MODE (operands[2]), operands[2]);
+ operands[2] = gen_lowpart (QImode, operands[2]);
+})
+
+(define_split
+ [(set (match_operand:SWI 0 "register_operand")
+ (any_rotate:SWI
+ (match_operand:SWI 1 "const_int_operand")
+ (subreg:QI
+ (minus
+ (match_operand 3 "const_int_operand")
+ (match_operand 2 "int248_register_operand")) 0)))]
+ "(INTVAL (operands[3]) & (<MODE_SIZE> * BITS_PER_UNIT - 1)) == 0"
+ [(set (match_dup 4) (match_dup 1))
+ (set (match_dup 0)
+ (<crotate>:SWI (match_dup 4) (subreg:QI (match_dup 2) 0)))]
+ "operands[4] = gen_reg_rtx (<MODE>mode);")
+
+(define_insn_and_split "*<insn><mode>3_sub_1"
+ [(set (match_operand:SWI 0 "nonimmediate_operand")
+ (any_rotate:SWI
+ (match_operand:SWI 1 "nonimmediate_operand")
+ (minus:QI
+ (match_operand:QI 3 "const_int_operand")
+ (match_operand:QI 2 "nonimmediate_operand"))))
+ (clobber (reg:CC FLAGS_REG))]
+ "ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)
+ && (INTVAL (operands[3]) & (<MODE_SIZE> * BITS_PER_UNIT - 1)) == 0
+ && ix86_pre_reload_split ()"
+ "#"
+ "&& 1"
+ [(parallel
+ [(set (match_dup 0)
+ (<crotate>:SWI (match_dup 1) (match_dup 2)))
+ (clobber (reg:CC FLAGS_REG))])]
+ "operands[2] = force_reg (GET_MODE (operands[2]), operands[2]);")
+
+(define_split
+ [(set (match_operand:SWI 0 "register_operand")
+ (any_rotate:SWI
+ (match_operand:SWI 1 "const_int_operand")
+ (minus:QI
+ (match_operand:QI 3 "const_int_operand")
+ (match_operand:QI 2 "register_operand"))))]
+ "(INTVAL (operands[3]) & (<MODE_SIZE> * BITS_PER_UNIT - 1)) == 0"
+ [(set (match_dup 4) (match_dup 1))
+ (set (match_dup 0)
+ (<crotate>:SWI (match_dup 4) (match_dup 2)))]
+ "operands[4] = gen_reg_rtx (<MODE>mode);")
+
;; Implement rotation using two double-precision
;; shift instructions and a scratch register.
diff --git a/gcc/testsuite/gcc.target/i386/pr117930.c b/gcc/testsuite/gcc.target/i386/pr117930.c
new file mode 100644
index 0000000..e8dec92
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr117930.c
@@ -0,0 +1,118 @@
+/* PR target/117930 */
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+/* { dg-final { scan-assembler-not "sub\[bwlq\]\t" } } */
+/* { dg-final { scan-assembler-not "add\[bwlq\]\t" } } */
+/* { dg-final { scan-assembler-not "lea\[lq\]\t" } } */
+
+static inline
+unsigned lrotate (unsigned x, int t)
+{
+ unsigned tl = x << t;
+ unsigned th = x >> (-t & 31);
+ return tl | th;
+}
+
+static inline
+unsigned rrotate (unsigned x, int t)
+{
+ unsigned tl = x >> t;
+ unsigned th = x << (-t & 31);
+ return tl | th;
+}
+
+unsigned
+f1 (unsigned x, int t)
+{
+ return lrotate (x, 32 - t);
+}
+
+unsigned
+f2 (unsigned x, int t)
+{
+ return lrotate (x, 64 - t);
+}
+
+unsigned
+f3 (unsigned x, int t)
+{
+ return lrotate (x, 32 + t);
+}
+
+unsigned
+f4 (unsigned x, int t)
+{
+ return lrotate (x, 64 + t);
+}
+
+unsigned
+f5 (unsigned x, int t)
+{
+ return rrotate (x, 32 - t);
+}
+
+unsigned
+f6 (unsigned x, int t)
+{
+ return rrotate (x, 64 - t);
+}
+
+unsigned
+f7 (unsigned x, int t)
+{
+ return rrotate (x, 32 + t);
+}
+
+unsigned
+f8 (unsigned x, int t)
+{
+ return rrotate (x, 64 + t);
+}
+
+unsigned
+f9 (int t)
+{
+ return lrotate (0xdeadbeefU, 32 - t);
+}
+
+unsigned
+f10 (int t)
+{
+ return lrotate (0xdeadbeefU, 64 - t);
+}
+
+unsigned
+f11 (int t)
+{
+ return lrotate (0xdeadbeefU, 32 + t);
+}
+
+unsigned
+f12 (int t)
+{
+ return lrotate (0xdeadbeefU, 64 + t);
+}
+
+unsigned
+f13 (int t)
+{
+ return rrotate (0xdeadbeefU, 32 - t);
+}
+
+unsigned
+f14 (int t)
+{
+ return rrotate (0xdeadbeefU, 64 - t);
+}
+
+unsigned
+f15 (int t)
+{
+ return rrotate (0xdeadbeefU, 32 + t);
+}
+
+unsigned
+f16 (int t)
+{
+ return rrotate (0xdeadbeefU, 64 + t);
+}