diff options
author | Uros Bizjak <ubizjak@gmail.com> | 2014-08-18 20:00:52 +0200 |
---|---|---|
committer | Uros Bizjak <uros@gcc.gnu.org> | 2014-08-18 20:00:52 +0200 |
commit | 374f5bf801f8487193ddd1e73027af1df8221f3e (patch) | |
tree | 7fec0943b3fc05b76acaabb85db4e1e275c7040e /gcc | |
parent | d6b0bb1c4e7c811893ef7d97b3c446bcced1d82c (diff) | |
download | gcc-374f5bf801f8487193ddd1e73027af1df8221f3e.zip gcc-374f5bf801f8487193ddd1e73027af1df8221f3e.tar.gz gcc-374f5bf801f8487193ddd1e73027af1df8221f3e.tar.bz2 |
re PR target/62011 (False Data Dependency in popcnt instruction)
PR target/62011
* config/i386/x86-tune.def (X86_TUNE_AVOID_FALSE_DEP_FOR_BMI):
New tune flag.
* config/i386/i386.h (TARGET_AVOID_FALSE_DEP_FOR_BMI): New define.
* config/i386/i386.md (unspec) <UNSPEC_INSN_FALSE_DEP>: New unspec.
(ffs<mode>2): Do not expand with tzcnt for
TARGET_AVOID_FALSE_DEP_FOR_BMI.
(ffssi2_no_cmove): Ditto.
(*tzcnt<mode>_1): Disable for TARGET_AVOID_FALSE_DEP_FOR_BMI.
(ctz<mode>2): New expander.
(*ctz<mode>2_falsedep_1): New insn_and_split pattern.
(*ctz<mode>2_falsedep): New insn.
(*ctz<mode>2): Rename from ctz<mode>2.
(clz<mode>2_lzcnt): New expander.
(*clz<mode>2_lzcnt_falsedep_1): New insn_and_split pattern.
(*clz<mode>2_lzcnt_falsedep): New insn.
(*clz<mode>2): Rename from ctz<mode>2.
(popcount<mode>2): New expander.
(*popcount<mode>2_falsedep_1): New insn_and_split pattern.
(*popcount<mode>2_falsedep): New insn.
(*popcount<mode>2): Rename from ctz<mode>2.
(*popcount<mode>2_cmp): Remove.
(*popcountsi2_cmp_zext): Ditto.
From-SVN: r214112
Diffstat (limited to 'gcc')
-rw-r--r-- | gcc/ChangeLog | 26 | ||||
-rw-r--r-- | gcc/config/i386/i386.h | 2 | ||||
-rw-r--r-- | gcc/config/i386/i386.md | 163 | ||||
-rw-r--r-- | gcc/config/i386/x86-tune.def | 5 |
4 files changed, 159 insertions, 37 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 629ae5f..8674f94 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,29 @@ +2014-08-18 Uros Bizjak <ubizjak@gmail.com> + + PR target/62011 + * config/i386/x86-tune.def (X86_TUNE_AVOID_FALSE_DEP_FOR_BMI): + New tune flag. + * config/i386/i386.h (TARGET_AVOID_FALSE_DEP_FOR_BMI): New define. + * config/i386/i386.md (unspec) <UNSPEC_INSN_FALSE_DEP>: New unspec. + (ffs<mode>2): Do not expand with tzcnt for + TARGET_AVOID_FALSE_DEP_FOR_BMI. + (ffssi2_no_cmove): Ditto. + (*tzcnt<mode>_1): Disable for TARGET_AVOID_FALSE_DEP_FOR_BMI. + (ctz<mode>2): New expander. + (*ctz<mode>2_falsedep_1): New insn_and_split pattern. + (*ctz<mode>2_falsedep): New insn. + (*ctz<mode>2): Rename from ctz<mode>2. + (clz<mode>2_lzcnt): New expander. + (*clz<mode>2_lzcnt_falsedep_1): New insn_and_split pattern. + (*clz<mode>2_lzcnt_falsedep): New insn. + (*clz<mode>2): Rename from ctz<mode>2. + (popcount<mode>2): New expander. + (*popcount<mode>2_falsedep_1): New insn_and_split pattern. + (*popcount<mode>2_falsedep): New insn. + (*popcount<mode>2): Rename from ctz<mode>2. + (*popcount<mode>2_cmp): Remove. + (*popcountsi2_cmp_zext): Ditto. + 2014-08-18 Ajit Agarwal <ajitkum@xilinx.com> * config/microblaze/microblaze.c (microblaze_elf_asm_cdtor): New. diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h index c2f0cee..ec6ed25 100644 --- a/gcc/config/i386/i386.h +++ b/gcc/config/i386/i386.h @@ -473,6 +473,8 @@ extern unsigned char ix86_tune_features[X86_TUNE_LAST]; ix86_tune_features[X86_TUNE_SPLIT_MEM_OPND_FOR_FP_CONVERTS] #define TARGET_ADJUST_UNROLL \ ix86_tune_features[X86_TUNE_ADJUST_UNROLL] +#define TARGET_AVOID_FALSE_DEP_FOR_BMI \ + ix86_tune_features[X86_TUNE_AVOID_FALSE_DEP_FOR_BMI] /* Feature tests against the various architecture variations. */ enum ix86_arch_indices { diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index 9bb7e16..4749b74 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -112,6 +112,7 @@ UNSPEC_XBEGIN_ABORT UNSPEC_STOS UNSPEC_PEEPSIB + UNSPEC_INSN_FALSE_DEP ;; For SSE/MMX support: UNSPEC_FIX_NOTRUNC @@ -12197,7 +12198,8 @@ DONE; } - flags_mode = TARGET_BMI ? CCCmode : CCZmode; + flags_mode + = (TARGET_BMI && !TARGET_AVOID_FALSE_DEP_FOR_BMI) ? CCCmode : CCZmode; operands[2] = gen_reg_rtx (<MODE>mode); operands[3] = gen_rtx_REG (flags_mode, FLAGS_REG); @@ -12223,7 +12225,8 @@ (parallel [(set (match_dup 0) (plus:SI (match_dup 0) (const_int 1))) (clobber (reg:CC FLAGS_REG))])] { - enum machine_mode flags_mode = TARGET_BMI ? CCCmode : CCZmode; + enum machine_mode flags_mode + = (TARGET_BMI && !TARGET_AVOID_FALSE_DEP_FOR_BMI) ? CCCmode : CCZmode; operands[3] = gen_lowpart (QImode, operands[2]); operands[4] = gen_rtx_REG (flags_mode, FLAGS_REG); @@ -12238,7 +12241,7 @@ (const_int 0))) (set (match_operand:SWI48 0 "register_operand" "=r") (ctz:SWI48 (match_dup 1)))] - "TARGET_BMI" + "TARGET_BMI && !TARGET_AVOID_FALSE_DEP_FOR_BMI" "tzcnt{<imodesuffix>}\t{%1, %0|%0, %1}" [(set_attr "type" "alu1") (set_attr "prefix_0f" "1") @@ -12259,7 +12262,52 @@ (set_attr "btver2_decode" "double") (set_attr "mode" "<MODE>")]) -(define_insn "ctz<mode>2" +(define_expand "ctz<mode>2" + [(parallel + [(set (match_operand:SWI248 0 "register_operand") + (ctz:SWI248 + (match_operand:SWI248 1 "nonimmediate_operand"))) + (clobber (reg:CC FLAGS_REG))])]) + +(define_insn_and_split "*ctz<mode>2_falsedep_1" + [(set (match_operand:SWI48 0 "register_operand" "=&r") + (ctz:SWI48 + (match_operand:SWI48 1 "nonimmediate_operand" "rm"))) + (clobber (reg:CC FLAGS_REG))] + "(TARGET_BMI || TARGET_GENERIC) + && TARGET_AVOID_FALSE_DEP_FOR_BMI && optimize_function_for_speed_p (cfun)" + "#" + "&& reload_completed" + [(parallel + [(set (match_dup 0) + (ctz:SWI48 (match_dup 1))) + (unspec [(match_dup 0)] UNSPEC_INSN_FALSE_DEP) + (clobber (reg:CC FLAGS_REG))])] + "ix86_expand_clear (operands[0]);") + +(define_insn "*ctz<mode>2_falsedep" + [(set (match_operand:SWI48 0 "register_operand" "=r") + (ctz:SWI48 + (match_operand:SWI48 1 "nonimmediate_operand" "rm"))) + (unspec [(match_operand:SWI48 2 "register_operand" "0")] + UNSPEC_INSN_FALSE_DEP) + (clobber (reg:CC FLAGS_REG))] + "" +{ + if (TARGET_BMI) + return "tzcnt{<imodesuffix>}\t{%1, %0|%0, %1}"; + else if (TARGET_GENERIC) + /* tzcnt expands to 'rep bsf' and we can use it even if !TARGET_BMI. */ + return "rep%; bsf{<imodesuffix>}\t{%1, %0|%0, %1}"; + else + gcc_unreachable (); +} + [(set_attr "type" "alu1") + (set_attr "prefix_0f" "1") + (set_attr "prefix_rep" "1") + (set_attr "mode" "<MODE>")]) + +(define_insn "*ctz<mode>2" [(set (match_operand:SWI248 0 "register_operand" "=r") (ctz:SWI248 (match_operand:SWI248 1 "nonimmediate_operand" "rm"))) (clobber (reg:CC FLAGS_REG))] @@ -12306,7 +12354,44 @@ operands[2] = GEN_INT (GET_MODE_BITSIZE (<MODE>mode)-1); }) -(define_insn "clz<mode>2_lzcnt" +(define_expand "clz<mode>2_lzcnt" + [(parallel + [(set (match_operand:SWI248 0 "register_operand") + (clz:SWI248 + (match_operand:SWI248 1 "nonimmediate_operand"))) + (clobber (reg:CC FLAGS_REG))])] + "TARGET_LZCNT") + +(define_insn_and_split "*clz<mode>2_lzcnt_falsedep_1" + [(set (match_operand:SWI48 0 "register_operand" "=&r") + (clz:SWI48 + (match_operand:SWI48 1 "nonimmediate_operand" "rm"))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_LZCNT + && TARGET_AVOID_FALSE_DEP_FOR_BMI && optimize_function_for_speed_p (cfun)" + "#" + "&& reload_completed" + [(parallel + [(set (match_dup 0) + (clz:SWI48 (match_dup 1))) + (unspec [(match_dup 0)] UNSPEC_INSN_FALSE_DEP) + (clobber (reg:CC FLAGS_REG))])] + "ix86_expand_clear (operands[0]);") + +(define_insn "*clz<mode>2_lzcnt_falsedep" + [(set (match_operand:SWI48 0 "register_operand" "=r") + (clz:SWI48 + (match_operand:SWI48 1 "nonimmediate_operand" "rm"))) + (unspec [(match_operand:SWI48 2 "register_operand" "0")] + UNSPEC_INSN_FALSE_DEP) + (clobber (reg:CC FLAGS_REG))] + "TARGET_LZCNT" + "lzcnt{<imodesuffix>}\t{%1, %0|%0, %1}" + [(set_attr "prefix_rep" "1") + (set_attr "type" "bitmanip") + (set_attr "mode" "<MODE>")]) + +(define_insn "*clz<mode>2_lzcnt" [(set (match_operand:SWI248 0 "register_operand" "=r") (clz:SWI248 (match_operand:SWI248 1 "nonimmediate_operand" "rm"))) (clobber (reg:CC FLAGS_REG))] @@ -12589,10 +12674,36 @@ (set_attr "prefix_0f" "1") (set_attr "mode" "HI")]) -(define_insn "popcount<mode>2" - [(set (match_operand:SWI248 0 "register_operand" "=r") - (popcount:SWI248 - (match_operand:SWI248 1 "nonimmediate_operand" "rm"))) +(define_expand "popcount<mode>2" + [(parallel + [(set (match_operand:SWI248 0 "register_operand") + (popcount:SWI248 + (match_operand:SWI248 1 "nonimmediate_operand"))) + (clobber (reg:CC FLAGS_REG))])] + "TARGET_POPCNT") + +(define_insn_and_split "*popcount<mode>2_falsedep_1" + [(set (match_operand:SWI48 0 "register_operand" "=&r") + (popcount:SWI48 + (match_operand:SWI48 1 "nonimmediate_operand" "rm"))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_POPCNT + && TARGET_AVOID_FALSE_DEP_FOR_BMI && optimize_function_for_speed_p (cfun)" + "#" + "&& reload_completed" + [(parallel + [(set (match_dup 0) + (popcount:SWI48 (match_dup 1))) + (unspec [(match_dup 0)] UNSPEC_INSN_FALSE_DEP) + (clobber (reg:CC FLAGS_REG))])] + "ix86_expand_clear (operands[0]);") + +(define_insn "*popcount<mode>2_falsedep" + [(set (match_operand:SWI48 0 "register_operand" "=r") + (popcount:SWI48 + (match_operand:SWI48 1 "nonimmediate_operand" "rm"))) + (unspec [(match_operand:SWI48 2 "register_operand" "0")] + UNSPEC_INSN_FALSE_DEP) (clobber (reg:CC FLAGS_REG))] "TARGET_POPCNT" { @@ -12606,15 +12717,12 @@ (set_attr "type" "bitmanip") (set_attr "mode" "<MODE>")]) -(define_insn "*popcount<mode>2_cmp" - [(set (reg FLAGS_REG) - (compare - (popcount:SWI248 - (match_operand:SWI248 1 "nonimmediate_operand" "rm")) - (const_int 0))) - (set (match_operand:SWI248 0 "register_operand" "=r") - (popcount:SWI248 (match_dup 1)))] - "TARGET_POPCNT && ix86_match_ccmode (insn, CCZmode)" +(define_insn "*popcount<mode>2" + [(set (match_operand:SWI248 0 "register_operand" "=r") + (popcount:SWI248 + (match_operand:SWI248 1 "nonimmediate_operand" "rm"))) + (clobber (reg:CC FLAGS_REG))] + "TARGET_POPCNT" { #if TARGET_MACHO return "popcnt\t{%1, %0|%0, %1}"; @@ -12626,25 +12734,6 @@ (set_attr "type" "bitmanip") (set_attr "mode" "<MODE>")]) -(define_insn "*popcountsi2_cmp_zext" - [(set (reg FLAGS_REG) - (compare - (popcount:SI (match_operand:SI 1 "nonimmediate_operand" "rm")) - (const_int 0))) - (set (match_operand:DI 0 "register_operand" "=r") - (zero_extend:DI(popcount:SI (match_dup 1))))] - "TARGET_64BIT && TARGET_POPCNT && ix86_match_ccmode (insn, CCZmode)" -{ -#if TARGET_MACHO - return "popcnt\t{%1, %0|%0, %1}"; -#else - return "popcnt{l}\t{%1, %0|%0, %1}"; -#endif -} - [(set_attr "prefix_rep" "1") - (set_attr "type" "bitmanip") - (set_attr "mode" "SI")]) - (define_expand "bswapdi2" [(set (match_operand:DI 0 "register_operand") (bswap:DI (match_operand:DI 1 "nonimmediate_operand")))] diff --git a/gcc/config/i386/x86-tune.def b/gcc/config/i386/x86-tune.def index cb44dc3..215c63c 100644 --- a/gcc/config/i386/x86-tune.def +++ b/gcc/config/i386/x86-tune.def @@ -509,6 +509,11 @@ DEF_TUNE (X86_TUNE_NOT_VECTORMODE, "not_vectormode", m_K6) DEF_TUNE (X86_TUNE_AVOID_VECTOR_DECODE, "avoid_vector_decode", m_K8) +/* X86_TUNE_AVOID_FALSE_DEP_FOR_BMI: Avoid false dependency + for bit-manipulation instructions. */ +DEF_TUNE (X86_TUNE_AVOID_FALSE_DEP_FOR_BMI, "avoid_false_dep_for_bmi", + m_SANDYBRIDGE | m_HASWELL | m_GENERIC) + /*****************************************************************************/ /* This never worked well before. */ /*****************************************************************************/ |