diff options
author | Lingling Kong <lingling.kong@intel.com> | 2024-06-03 14:23:57 +0800 |
---|---|---|
committer | Lingling Kong <lingling.kong@intel.com> | 2024-06-03 14:27:08 +0800 |
commit | 4371ac5292da9eadc3daccbdce2a15c029a04892 (patch) | |
tree | 297254ec7d478820bd92c71cb71023c7b19d4b52 | |
parent | 28ac63180e000843ba4a82384407bcc203f27853 (diff) | |
download | gcc-4371ac5292da9eadc3daccbdce2a15c029a04892.zip gcc-4371ac5292da9eadc3daccbdce2a15c029a04892.tar.gz gcc-4371ac5292da9eadc3daccbdce2a15c029a04892.tar.bz2 |
[APX NF] Support APX NF for lzcnt/tzcnt/popcnt
gcc/ChangeLog:
* config/i386/i386.md (clz<mode>2_lzcnt_nf): New define_insn.
(*clz<mode>2_lzcnt_falsedep_nf): Ditto.
(<lt_zcnt>_<mode>_nf): Ditto.
(*<lt_zcnt>_<mode>_falsedep_nf): Ditto.
(<lt_zcnt>_hi<nf_name>): Ditto.
(popcount<mode>2_nf): Ditto.
(*popcount<mode>2_falsedep_nf): Ditto.
(popcounthi2<nf_name>): Ditto.
-rw-r--r-- | gcc/config/i386/i386.md | 124 |
1 files changed, 113 insertions, 11 deletions
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index 48ca19c..2c95395 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -20283,6 +20283,24 @@ operands[3] = gen_reg_rtx (<MODE>mode); }) +(define_insn_and_split "clz<mode>2_lzcnt_nf" + [(set (match_operand:SWI48 0 "register_operand" "=r") + (clz:SWI48 + (match_operand:SWI48 1 "nonimmediate_operand" "rm")))] + "TARGET_APX_NF && TARGET_LZCNT" + "%{nf%} lzcnt{<imodesuffix>}\t{%1, %0|%0, %1}" + "&& TARGET_AVOID_FALSE_DEP_FOR_BMI && epilogue_completed + && optimize_function_for_speed_p (cfun) + && !reg_mentioned_p (operands[0], operands[1])" + [(parallel + [(set (match_dup 0) + (clz:SWI48 (match_dup 1))) + (unspec [(match_dup 0)] UNSPEC_INSN_FALSE_DEP)])] + "ix86_expand_clear (operands[0]);" + [(set_attr "prefix_rep" "1") + (set_attr "type" "bitmanip") + (set_attr "mode" "<MODE>")]) + (define_insn_and_split "clz<mode>2_lzcnt" [(set (match_operand:SWI48 0 "register_operand" "=r") (clz:SWI48 @@ -20306,6 +20324,18 @@ ; False dependency happens when destination is only updated by tzcnt, ; lzcnt or popcnt. There is no false dependency when destination is ; also used in source. +(define_insn "*clz<mode>2_lzcnt_falsedep_nf" + [(set (match_operand:SWI48 0 "register_operand" "=r") + (clz:SWI48 + (match_operand:SWI48 1 "nonimmediate_operand" "rm"))) + (unspec [(match_operand:SWI48 2 "register_operand" "0")] + UNSPEC_INSN_FALSE_DEP)] + "TARGET_APX_NF && TARGET_LZCNT" + "%{nf%} lzcnt{<imodesuffix>}\t{%1, %0|%0, %1}" + [(set_attr "prefix_rep" "1") + (set_attr "type" "bitmanip") + (set_attr "mode" "<MODE>")]) + (define_insn "*clz<mode>2_lzcnt_falsedep" [(set (match_operand:SWI48 0 "register_operand" "=r") (clz:SWI48 @@ -20412,6 +20442,25 @@ ;; Version of lzcnt/tzcnt that is expanded from intrinsics. This version ;; provides operand size as output when source operand is zero. +(define_insn_and_split "<lt_zcnt>_<mode>_nf" + [(set (match_operand:SWI48 0 "register_operand" "=r") + (unspec:SWI48 + [(match_operand:SWI48 1 "nonimmediate_operand" "rm")] LT_ZCNT))] + "TARGET_APX_NF" + "%{nf%} <lt_zcnt>{<imodesuffix>}\t{%1, %0|%0, %1}" + "&& TARGET_AVOID_FALSE_DEP_FOR_BMI && epilogue_completed + && optimize_function_for_speed_p (cfun) + && !reg_mentioned_p (operands[0], operands[1])" + [(parallel + [(set (match_dup 0) + (unspec:SWI48 [(match_dup 1)] LT_ZCNT)) + (unspec [(match_dup 0)] UNSPEC_INSN_FALSE_DEP)])] + "ix86_expand_clear (operands[0]);" + [(set_attr "type" "<lt_zcnt_type>") + (set_attr "prefix_0f" "1") + (set_attr "prefix_rep" "1") + (set_attr "mode" "<MODE>")]) + (define_insn_and_split "<lt_zcnt>_<mode>" [(set (match_operand:SWI48 0 "register_operand" "=r") (unspec:SWI48 @@ -20436,6 +20485,19 @@ ; False dependency happens when destination is only updated by tzcnt, ; lzcnt or popcnt. There is no false dependency when destination is ; also used in source. +(define_insn "*<lt_zcnt>_<mode>_falsedep_nf" + [(set (match_operand:SWI48 0 "register_operand" "=r") + (unspec:SWI48 + [(match_operand:SWI48 1 "nonimmediate_operand" "rm")] LT_ZCNT)) + (unspec [(match_operand:SWI48 2 "register_operand" "0")] + UNSPEC_INSN_FALSE_DEP)] + "TARGET_APX_NF" + "%{nf%} <lt_zcnt>{<imodesuffix>}\t{%1, %0|%0, %1}" + [(set_attr "type" "<lt_zcnt_type>") + (set_attr "prefix_0f" "1") + (set_attr "prefix_rep" "1") + (set_attr "mode" "<MODE>")]) + (define_insn "*<lt_zcnt>_<mode>_falsedep" [(set (match_operand:SWI48 0 "register_operand" "=r") (unspec:SWI48 @@ -20450,13 +20512,12 @@ (set_attr "prefix_rep" "1") (set_attr "mode" "<MODE>")]) -(define_insn "<lt_zcnt>_hi" +(define_insn "<lt_zcnt>_hi<nf_name>" [(set (match_operand:HI 0 "register_operand" "=r") (unspec:HI - [(match_operand:HI 1 "nonimmediate_operand" "rm")] LT_ZCNT)) - (clobber (reg:CC FLAGS_REG))] - "" - "<lt_zcnt>{w}\t{%1, %0|%0, %1}" + [(match_operand:HI 1 "nonimmediate_operand" "rm")] LT_ZCNT))] + "<nf_condition>" + "<nf_prefix><lt_zcnt>{w}\t{%1, %0|%0, %1}" [(set_attr "type" "<lt_zcnt_type>") (set_attr "prefix_0f" "1") (set_attr "prefix_rep" "1") @@ -20874,6 +20935,30 @@ [(set_attr "type" "bitmanip") (set_attr "mode" "<MODE>")]) +(define_insn_and_split "popcount<mode>2_nf" + [(set (match_operand:SWI48 0 "register_operand" "=r") + (popcount:SWI48 + (match_operand:SWI48 1 "nonimmediate_operand" "rm")))] + "TARGET_APX_NF && TARGET_POPCNT" +{ +#if TARGET_MACHO + return "%{nf%} popcnt\t{%1, %0|%0, %1}"; +#else + return "%{nf%} popcnt{<imodesuffix>}\t{%1, %0|%0, %1}"; +#endif +} + "&& TARGET_AVOID_FALSE_DEP_FOR_BMI && epilogue_completed + && optimize_function_for_speed_p (cfun) + && !reg_mentioned_p (operands[0], operands[1])" + [(parallel + [(set (match_dup 0) + (popcount:SWI48 (match_dup 1))) + (unspec [(match_dup 0)] UNSPEC_INSN_FALSE_DEP)])] + "ix86_expand_clear (operands[0]);" + [(set_attr "prefix_rep" "1") + (set_attr "type" "bitmanip") + (set_attr "mode" "<MODE>")]) + (define_insn_and_split "popcount<mode>2" [(set (match_operand:SWI48 0 "register_operand" "=r") (popcount:SWI48 @@ -20903,6 +20988,24 @@ ; False dependency happens when destination is only updated by tzcnt, ; lzcnt or popcnt. There is no false dependency when destination is ; also used in source. +(define_insn "*popcount<mode>2_falsedep_nf" + [(set (match_operand:SWI48 0 "register_operand" "=r") + (popcount:SWI48 + (match_operand:SWI48 1 "nonimmediate_operand" "rm"))) + (unspec [(match_operand:SWI48 2 "register_operand" "0")] + UNSPEC_INSN_FALSE_DEP)] + "TARGET_APX_NF && TARGET_POPCNT" +{ +#if TARGET_MACHO + return "%{nf%} popcnt\t{%1, %0|%0, %1}"; +#else + return "%{nf%} popcnt{<imodesuffix>}\t{%1, %0|%0, %1}"; +#endif +} + [(set_attr "prefix_rep" "1") + (set_attr "type" "bitmanip") + (set_attr "mode" "<MODE>")]) + (define_insn "*popcount<mode>2_falsedep" [(set (match_operand:SWI48 0 "register_operand" "=r") (popcount:SWI48 @@ -21060,17 +21163,16 @@ DONE; }) -(define_insn "popcounthi2" +(define_insn "popcounthi2<nf_name>" [(set (match_operand:HI 0 "register_operand" "=r") (popcount:HI - (match_operand:HI 1 "nonimmediate_operand" "rm"))) - (clobber (reg:CC FLAGS_REG))] - "TARGET_POPCNT" + (match_operand:HI 1 "nonimmediate_operand" "rm")))] + "TARGET_POPCNT && <nf_condition>" { #if TARGET_MACHO - return "popcnt\t{%1, %0|%0, %1}"; + return "<nf_prefix>popcnt\t{%1, %0|%0, %1}"; #else - return "popcnt{w}\t{%1, %0|%0, %1}"; + return "<nf_prefix>popcnt{w}\t{%1, %0|%0, %1}"; #endif } [(set_attr "prefix_rep" "1") |