diff options
author | Uros Bizjak <ubizjak@gmail.com> | 2017-08-21 17:15:07 +0200 |
---|---|---|
committer | Uros Bizjak <uros@gcc.gnu.org> | 2017-08-21 17:15:07 +0200 |
commit | 453773df32204ae524683b098444c2fa4ace8033 (patch) | |
tree | 3f9aa756b02d7fb4ed312861acd83d3be7bab864 | |
parent | 2c0378f467abaa9190d52c3930ec4c825416f72f (diff) | |
download | gcc-453773df32204ae524683b098444c2fa4ace8033.zip gcc-453773df32204ae524683b098444c2fa4ace8033.tar.gz gcc-453773df32204ae524683b098444c2fa4ace8033.tar.bz2 |
re PR target/46091 (missed optimization: x86 bt/btc/bts instructions)
PR target/46091
* config/i386/i386.md (*btsq_imm): Rename from *btsq.
(*btrq_imm): Rename from *btrq.
(*btcq_imm): Rename from *btcq.
(btsc): New code attribute.
(*<btsc><mode>): New insn pattern.
(*btr<mode>): Ditto.
(*<btsc><mode>_mask): New insn_and_split pattern.
(*btr<mode>_mask): Ditto.
testsuite/ChangeLog:
PR target/46091
* gcc.target/i386/pr46091-4.c: New test.
* gcc.target/i386/pr46091-4a.c: Ditto.
* gcc.target/i386/pr46091-5.c: Ditto.
* gcc.target/i386/pr46091-5a.c: Ditto.
From-SVN: r251235
-rw-r--r-- | gcc/ChangeLog | 12 | ||||
-rw-r--r-- | gcc/config/i386/i386.md | 112 | ||||
-rw-r--r-- | gcc/testsuite/ChangeLog | 8 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/pr46091-4.c | 29 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/pr46091-4a.c | 31 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/pr46091-5.c | 29 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/pr46091-5a.c | 31 |
7 files changed, 239 insertions, 13 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 26b34a3..58f3cd1 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,15 @@ +2017-08-21 Uros Bizjak <ubizjak@gmail.com> + + PR target/46091 + * config/i386/i386.md (*btsq_imm): Rename from *btsq. + (*btrq_imm): Rename from *btrq. + (*btcq_imm): Rename from *btcq. + (btsc): New code attribute. + (*<btsc><mode>): New insn pattern. + (*btr<mode>): Ditto. + (*<btsc><mode>_mask): New insn_and_split pattern. + (*btr<mode>_mask): Ditto. + 2017-08-21 Richard Sandiford <richard.sandiford@linaro.org> Alan Hayward <alan.hayward@arm.com> David Sherwood <david.sherwood@arm.com> diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index 0d04bff..f984060 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -1081,6 +1081,9 @@ ;; Immediate operand constraint for shifts. (define_mode_attr S [(QI "I") (HI "I") (SI "I") (DI "J") (TI "O")]) +;; Print register name in the specified mode. +(define_mode_attr k [(QI "b") (HI "w") (SI "k") (DI "q")]) + ;; General operand predicate for integer modes. (define_mode_attr general_operand [(QI "general_operand") @@ -10998,20 +11001,103 @@ ;; Bit set / bit test instructions -;; %%% bts, btr, btc, bt. -;; In general these instructions are *slow* with variable operand -;; when applied to memory. When applied to registers, it depends -;; on the cpu implementation. They're never faster than the -;; corresponding and/ior/xor operations, so with 32-bit there's -;; no point. But in 64-bit, we can't hold the relevant immediates -;; within the instruction itself, so operating on bits in the high -;; 32-bits of a register becomes easier. +;; %%% bts, btr, btc + +;; These instructions are *slow* when applied to memory. + +(define_code_attr btsc [(ior "bts") (xor "btc")]) + +(define_insn "*<btsc><mode>" + [(set (match_operand:SWI48 0 "register_operand" "=r") + (any_or:SWI48 + (ashift:SWI48 (const_int 1) + (match_operand:QI 1 "register_operand" "r")) + (match_operand:SWI48 2 "nonimmediate_operand" "0"))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_USE_BT" + "<btsc>{<imodesuffix>}\t{%<k>1, %0|%0, %<k>1}" + [(set_attr "type" "alu1") + (set_attr "prefix_0f" "1") + (set_attr "znver1_decode" "double") + (set_attr "mode" "<MODE>")]) + +;; Avoid useless masking of count operand. +(define_insn_and_split "*<btsc><mode>_mask" + [(set (match_operand:SWI48 0 "register_operand") + (any_or:SWI48 + (ashift:SWI48 + (const_int 1) + (subreg:QI + (and:SI + (match_operand:SI 1 "register_operand") + (match_operand:SI 2 "const_int_operand")) 0)) + (match_operand:SWI48 3 "nonimmediate_operand"))) + (clobber (reg:CC FLAGS_REG))] + "(INTVAL (operands[2]) & (GET_MODE_BITSIZE (<MODE>mode)-1)) + == GET_MODE_BITSIZE (<MODE>mode)-1 + && can_create_pseudo_p ()" + "#" + "&& 1" + [(parallel + [(set (match_dup 0) + (any_or:SWI48 + (ashift:SWI48 (const_int 1) + (match_dup 1)) + (match_dup 3))) + (clobber (reg:CC FLAGS_REG))])] + "operands[1] = gen_lowpart (QImode, operands[1]);") + +(define_insn "*btr<mode>" + [(set (match_operand:SWI48 0 "register_operand" "=r") + (and:SWI48 + (rotate:SWI48 (const_int -2) + (match_operand:QI 1 "register_operand" "r")) + (match_operand:SWI48 2 "nonimmediate_operand" "0"))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_USE_BT" + "btr{<imodesuffix>}\t{%<k>1, %0|%0, %<k>1}" + [(set_attr "type" "alu1") + (set_attr "prefix_0f" "1") + (set_attr "znver1_decode" "double") + (set_attr "mode" "<MODE>")]) + +;; Avoid useless masking of count operand. +(define_insn_and_split "*btr<mode>_mask" + [(set (match_operand:SWI48 0 "register_operand") + (and:SWI48 + (rotate:SWI48 + (const_int -2) + (subreg:QI + (and:SI + (match_operand:SI 1 "register_operand") + (match_operand:SI 2 "const_int_operand")) 0)) + (match_operand:SWI48 3 "nonimmediate_operand"))) + (clobber (reg:CC FLAGS_REG))] + "(INTVAL (operands[2]) & (GET_MODE_BITSIZE (<MODE>mode)-1)) + == GET_MODE_BITSIZE (<MODE>mode)-1 + && can_create_pseudo_p ()" + "#" + "&& 1" + [(parallel + [(set (match_dup 0) + (and:SWI48 + (rotate:SWI48 (const_int -2) + (match_dup 1)) + (match_dup 3))) + (clobber (reg:CC FLAGS_REG))])] + "operands[1] = gen_lowpart (QImode, operands[1]);") + +;; These instructions are never faster than the corresponding +;; and/ior/xor operations when using immediate operand, so with +;; 32-bit there's no point. But in 64-bit, we can't hold the +;; relevant immediates within the instruction itself, so operating +;; on bits in the high 32-bits of a register becomes easier. ;; ;; These are slow on Nocona, but fast on Athlon64. We do require the use ;; of btrq and btcq for corner cases of post-reload expansion of absdf and ;; negdf respectively, so they can never be disabled entirely. -(define_insn "*btsq" +(define_insn "*btsq_imm" [(set (zero_extract:DI (match_operand:DI 0 "nonimmediate_operand" "+rm") (const_int 1) (match_operand 1 "const_0_to_63_operand" "J")) @@ -11024,7 +11110,7 @@ (set_attr "znver1_decode" "double") (set_attr "mode" "DI")]) -(define_insn "*btrq" +(define_insn "*btrq_imm" [(set (zero_extract:DI (match_operand:DI 0 "nonimmediate_operand" "+rm") (const_int 1) (match_operand 1 "const_0_to_63_operand" "J")) @@ -11037,7 +11123,7 @@ (set_attr "znver1_decode" "double") (set_attr "mode" "DI")]) -(define_insn "*btcq" +(define_insn "*btcq_imm" [(set (zero_extract:DI (match_operand:DI 0 "nonimmediate_operand" "+rm") (const_int 1) (match_operand 1 "const_0_to_63_operand" "J")) @@ -11125,6 +11211,8 @@ } }) +;; %%% bt + (define_insn "*bt<mode>" [(set (reg:CCC FLAGS_REG) (compare:CCC @@ -13148,8 +13236,6 @@ (set_attr "prefix" "vex") (set_attr "mode" "<MODE>")]) -(define_mode_attr k [(SI "k") (DI "q")]) - (define_insn "*bmi2_bzhi_<mode>3_1" [(set (match_operand:SWI48 0 "register_operand" "=r") (zero_extract:SWI48 diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index f35db2b..006f115 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,11 @@ +2017-08-21 Uros Bizjak <ubizjak@gmail.com> + + PR target/46091 + * gcc.target/i386/pr46091-4.c: New test. + * gcc.target/i386/pr46091-4a.c: Ditto. + * gcc.target/i386/pr46091-5.c: Ditto. + * gcc.target/i386/pr46091-5a.c: Ditto. + 2017-08-21 Nathan Sidwell <nathan@acm.org> PR c++/81899 diff --git a/gcc/testsuite/gcc.target/i386/pr46091-4.c b/gcc/testsuite/gcc.target/i386/pr46091-4.c new file mode 100644 index 0000000..af2cfae --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr46091-4.c @@ -0,0 +1,29 @@ +/* { dg-do compile } */ +/* { dg-options "-O2" } */ + +int test_1 (int x, int n) +{ + x &= ~(0x01 << n); + + return x; +} + +/* { dg-final { scan-assembler "btr" } } */ + +int test_2 (int x, int n) +{ + x |= (0x01 << n); + + return x; +} + +/* { dg-final { scan-assembler "bts" } } */ + +int test_3 (int x, int n) +{ + x ^= (0x01 << n); + + return x; +} + +/* { dg-final { scan-assembler "btc" } } */ diff --git a/gcc/testsuite/gcc.target/i386/pr46091-4a.c b/gcc/testsuite/gcc.target/i386/pr46091-4a.c new file mode 100644 index 0000000..5874aee --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr46091-4a.c @@ -0,0 +1,31 @@ +/* { dg-do compile } */ +/* { dg-options "-O2" } */ + +int test_1 (int x, int n) +{ + n &= 0x1f; + + x &= ~(0x01 << n); + + return x; +} + +int test_2 (int x, int n) +{ + n &= 0x1f; + + x |= (0x01 << n); + + return x; +} + +int test_3 (int x, int n) +{ + n &= 0x1f; + + x ^= (0x01 << n); + + return x; +} + +/* { dg-final { scan-assembler-not "and\[lq\]\[ \t\]" } } */ diff --git a/gcc/testsuite/gcc.target/i386/pr46091-5.c b/gcc/testsuite/gcc.target/i386/pr46091-5.c new file mode 100644 index 0000000..3017029 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr46091-5.c @@ -0,0 +1,29 @@ +/* { dg-do compile { target { ! ia32 } } } */ +/* { dg-options "-O2" } */ + +long test_1 (long x, int n) +{ + x &= ~((long)0x01 << n); + + return x; +} + +/* { dg-final { scan-assembler "btr" } } */ + +long test_2 (long x, int n) +{ + x |= ((long)0x01 << n); + + return x; +} + +/* { dg-final { scan-assembler "bts" } } */ + +long test_3 (long x, int n) +{ + x ^= ((long)0x01 << n); + + return x; +} + +/* { dg-final { scan-assembler "btc" } } */ diff --git a/gcc/testsuite/gcc.target/i386/pr46091-5a.c b/gcc/testsuite/gcc.target/i386/pr46091-5a.c new file mode 100644 index 0000000..0fa2d9b --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr46091-5a.c @@ -0,0 +1,31 @@ +/* { dg-do compile { target { ! ia32 } } } */ +/* { dg-options "-O2" } */ + +long test_1 (long x, int n) +{ + n &= 0x3f; + + x &= ~((long)0x01 << n); + + return x; +} + +long test_2 (long x, int n) +{ + n &= 0x3f; + + x |= ((long)0x01 << n); + + return x; +} + +long test_3 (long x, int n) +{ + n &= 0x3f; + + x ^= ((long)0x01 << n); + + return x; +} + +/* { dg-final { scan-assembler-not "and\[lq\]\[ \t\]" } } */ |