diff options
author | Jakub Jelinek <jakub@redhat.com> | 2020-01-30 09:39:05 +0100 |
---|---|---|
committer | Jakub Jelinek <jakub@redhat.com> | 2020-01-30 09:39:05 +0100 |
commit | b285bebe6ad1e9f6416f0eb6cb69edc44db7813c (patch) | |
tree | 28a294b50851fef478ce7d6a189f210086bc0686 | |
parent | 2595f25cdaf4f16d04a1078a487b2ecc126cae29 (diff) | |
download | gcc-b285bebe6ad1e9f6416f0eb6cb69edc44db7813c.zip gcc-b285bebe6ad1e9f6416f0eb6cb69edc44db7813c.tar.gz gcc-b285bebe6ad1e9f6416f0eb6cb69edc44db7813c.tar.bz2 |
i386: Optimize popcnt followed by zero/sign extension [PR91824]
Like any other instruction with 32-bit GPR destination operand in 64-bit
mode, popcntl also clears the upper 32 bits of the register (and other bits
too, it can return only 0 to 32 inclusive).
During combine, the zero or sign extensions of it show up as paradoxical
subreg of the popcount & 63, there 63 is the smallest power of two - 1 mask
that can represent all the 0 to 32 inclusive values.
2020-01-30 Jakub Jelinek <jakub@redhat.com>
PR target/91824
* config/i386/i386.md (*popcountsi2_zext): New define_insn_and_split.
(*popcountsi2_zext_falsedep): New define_insn.
* gcc.target/i386/pr91824-1.c: New test.
-rw-r--r-- | gcc/ChangeLog | 6 | ||||
-rw-r--r-- | gcc/config/i386/i386.md | 54 | ||||
-rw-r--r-- | gcc/testsuite/ChangeLog | 5 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/pr91824-1.c | 54 |
4 files changed, 119 insertions, 0 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index a36e732..c86b9c2 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,9 @@ +2020-01-30 Jakub Jelinek <jakub@redhat.com> + + PR target/91824 + * config/i386/i386.md (*popcountsi2_zext): New define_insn_and_split. + (*popcountsi2_zext_falsedep): New define_insn. + 2020-01-30 Dragan Mladjenovic <dmladjenovic@wavecomp.com> * config.in: Regenerated. diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index f83b370..f5c8d55 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -14563,6 +14563,60 @@ (set_attr "type" "bitmanip") (set_attr "mode" "<MODE>")]) +(define_insn_and_split "*popcountsi2_zext" + [(set (match_operand:DI 0 "register_operand" "=r") + (and:DI + (subreg:DI + (popcount:SI + (match_operand:SI 1 "nonimmediate_operand" "rm")) 0) + (const_int 63))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_POPCNT && TARGET_64BIT" +{ +#if TARGET_MACHO + return "popcnt\t{%1, %k0|%k0, %1}"; +#else + return "popcnt{l}\t{%1, %k0|%k0, %1}"; +#endif +} + "&& TARGET_AVOID_FALSE_DEP_FOR_BMI && epilogue_completed + && optimize_function_for_speed_p (cfun) + && !reg_mentioned_p (operands[0], operands[1])" + [(parallel + [(set (match_dup 0) + (and:DI (subreg:DI (popcount:SI (match_dup 1)) 0) (const_int 63))) + (unspec [(match_dup 0)] UNSPEC_INSN_FALSE_DEP) + (clobber (reg:CC FLAGS_REG))])] + "ix86_expand_clear (operands[0]);" + [(set_attr "prefix_rep" "1") + (set_attr "type" "bitmanip") + (set_attr "mode" "SI")]) + +; False dependency happens when destination is only updated by tzcnt, +; lzcnt or popcnt. There is no false dependency when destination is +; also used in source. +(define_insn "*popcountsi2_zext_falsedep" + [(set (match_operand:DI 0 "register_operand" "=r") + (and:DI + (subreg:DI + (popcount:SI + (match_operand:SI 1 "nonimmediate_operand" "rm")) 0) + (const_int 63))) + (unspec [(match_operand:DI 2 "register_operand" "0")] + UNSPEC_INSN_FALSE_DEP) + (clobber (reg:CC FLAGS_REG))] + "TARGET_POPCNT && TARGET_64BIT" +{ +#if TARGET_MACHO + return "popcnt\t{%1, %k0|%k0, %1}"; +#else + return "popcnt{l}\t{%1, %k0|%k0, %1}"; +#endif +} + [(set_attr "prefix_rep" "1") + (set_attr "type" "bitmanip") + (set_attr "mode" "SI")]) + (define_insn_and_split "*popcounthi2_1" [(set (match_operand:SI 0 "register_operand") (popcount:SI diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 30e804b..9b36606 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,8 @@ +2020-01-30 Jakub Jelinek <jakub@redhat.com> + + PR target/91824 + * gcc.target/i386/pr91824-1.c: New test. + 2020-01-30 Bin Cheng <bin.cheng@linux.alibaba.com> * g++.dg/coroutines/co-await-syntax-09-convert.C: New test. diff --git a/gcc/testsuite/gcc.target/i386/pr91824-1.c b/gcc/testsuite/gcc.target/i386/pr91824-1.c new file mode 100644 index 0000000..0bb24e7 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr91824-1.c @@ -0,0 +1,54 @@ +/* PR target/91824 */ +/* { dg-do compile { target lp64 } } */ +/* { dg-options "-O2 -mpopcnt" } */ +/* { dg-final { scan-assembler-not "cltq" } } */ + +unsigned int foo (void); + +unsigned long +f1 (unsigned int x) +{ + return __builtin_popcount (x); +} + +unsigned long +f2 (unsigned int x) +{ + return (unsigned) __builtin_popcount (x); +} + +unsigned long +f3 (unsigned int x) +{ + return __builtin_popcount (x) & 63ULL; +} + +unsigned long +f4 (unsigned int x) +{ + return __builtin_popcount (x) & 1023ULL; +} + +unsigned long +f5 (void) +{ + return __builtin_popcount (foo ()); +} + +unsigned long +f6 (void) +{ + return (unsigned) __builtin_popcount (foo ()); +} + +unsigned long +f7 (void) +{ + return __builtin_popcount (foo ()) & 63ULL; +} + +unsigned long +f8 (void) +{ + return __builtin_popcount (foo ()) & 1023ULL; +} |