aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLingling Kong <lingling.kong@intel.com>2024-06-03 14:23:57 +0800
committerLingling Kong <lingling.kong@intel.com>2024-06-03 14:27:08 +0800
commit4371ac5292da9eadc3daccbdce2a15c029a04892 (patch)
tree297254ec7d478820bd92c71cb71023c7b19d4b52
parent28ac63180e000843ba4a82384407bcc203f27853 (diff)
downloadgcc-4371ac5292da9eadc3daccbdce2a15c029a04892.zip
gcc-4371ac5292da9eadc3daccbdce2a15c029a04892.tar.gz
gcc-4371ac5292da9eadc3daccbdce2a15c029a04892.tar.bz2
[APX NF] Support APX NF for lzcnt/tzcnt/popcnt
gcc/ChangeLog: * config/i386/i386.md (clz<mode>2_lzcnt_nf): New define_insn. (*clz<mode>2_lzcnt_falsedep_nf): Ditto. (<lt_zcnt>_<mode>_nf): Ditto. (*<lt_zcnt>_<mode>_falsedep_nf): Ditto. (<lt_zcnt>_hi<nf_name>): Ditto. (popcount<mode>2_nf): Ditto. (*popcount<mode>2_falsedep_nf): Ditto. (popcounthi2<nf_name>): Ditto.
-rw-r--r--gcc/config/i386/i386.md124
1 files changed, 113 insertions, 11 deletions
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 48ca19c..2c95395 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -20283,6 +20283,24 @@
operands[3] = gen_reg_rtx (<MODE>mode);
})
+(define_insn_and_split "clz<mode>2_lzcnt_nf"
+ [(set (match_operand:SWI48 0 "register_operand" "=r")
+ (clz:SWI48
+ (match_operand:SWI48 1 "nonimmediate_operand" "rm")))]
+ "TARGET_APX_NF && TARGET_LZCNT"
+ "%{nf%} lzcnt{<imodesuffix>}\t{%1, %0|%0, %1}"
+ "&& TARGET_AVOID_FALSE_DEP_FOR_BMI && epilogue_completed
+ && optimize_function_for_speed_p (cfun)
+ && !reg_mentioned_p (operands[0], operands[1])"
+ [(parallel
+ [(set (match_dup 0)
+ (clz:SWI48 (match_dup 1)))
+ (unspec [(match_dup 0)] UNSPEC_INSN_FALSE_DEP)])]
+ "ix86_expand_clear (operands[0]);"
+ [(set_attr "prefix_rep" "1")
+ (set_attr "type" "bitmanip")
+ (set_attr "mode" "<MODE>")])
+
(define_insn_and_split "clz<mode>2_lzcnt"
[(set (match_operand:SWI48 0 "register_operand" "=r")
(clz:SWI48
@@ -20306,6 +20324,18 @@
; False dependency happens when destination is only updated by tzcnt,
; lzcnt or popcnt. There is no false dependency when destination is
; also used in source.
+(define_insn "*clz<mode>2_lzcnt_falsedep_nf"
+ [(set (match_operand:SWI48 0 "register_operand" "=r")
+ (clz:SWI48
+ (match_operand:SWI48 1 "nonimmediate_operand" "rm")))
+ (unspec [(match_operand:SWI48 2 "register_operand" "0")]
+ UNSPEC_INSN_FALSE_DEP)]
+ "TARGET_APX_NF && TARGET_LZCNT"
+ "%{nf%} lzcnt{<imodesuffix>}\t{%1, %0|%0, %1}"
+ [(set_attr "prefix_rep" "1")
+ (set_attr "type" "bitmanip")
+ (set_attr "mode" "<MODE>")])
+
(define_insn "*clz<mode>2_lzcnt_falsedep"
[(set (match_operand:SWI48 0 "register_operand" "=r")
(clz:SWI48
@@ -20412,6 +20442,25 @@
;; Version of lzcnt/tzcnt that is expanded from intrinsics. This version
;; provides operand size as output when source operand is zero.
+(define_insn_and_split "<lt_zcnt>_<mode>_nf"
+ [(set (match_operand:SWI48 0 "register_operand" "=r")
+ (unspec:SWI48
+ [(match_operand:SWI48 1 "nonimmediate_operand" "rm")] LT_ZCNT))]
+ "TARGET_APX_NF"
+ "%{nf%} <lt_zcnt>{<imodesuffix>}\t{%1, %0|%0, %1}"
+ "&& TARGET_AVOID_FALSE_DEP_FOR_BMI && epilogue_completed
+ && optimize_function_for_speed_p (cfun)
+ && !reg_mentioned_p (operands[0], operands[1])"
+ [(parallel
+ [(set (match_dup 0)
+ (unspec:SWI48 [(match_dup 1)] LT_ZCNT))
+ (unspec [(match_dup 0)] UNSPEC_INSN_FALSE_DEP)])]
+ "ix86_expand_clear (operands[0]);"
+ [(set_attr "type" "<lt_zcnt_type>")
+ (set_attr "prefix_0f" "1")
+ (set_attr "prefix_rep" "1")
+ (set_attr "mode" "<MODE>")])
+
(define_insn_and_split "<lt_zcnt>_<mode>"
[(set (match_operand:SWI48 0 "register_operand" "=r")
(unspec:SWI48
@@ -20436,6 +20485,19 @@
; False dependency happens when destination is only updated by tzcnt,
; lzcnt or popcnt. There is no false dependency when destination is
; also used in source.
+(define_insn "*<lt_zcnt>_<mode>_falsedep_nf"
+ [(set (match_operand:SWI48 0 "register_operand" "=r")
+ (unspec:SWI48
+ [(match_operand:SWI48 1 "nonimmediate_operand" "rm")] LT_ZCNT))
+ (unspec [(match_operand:SWI48 2 "register_operand" "0")]
+ UNSPEC_INSN_FALSE_DEP)]
+ "TARGET_APX_NF"
+ "%{nf%} <lt_zcnt>{<imodesuffix>}\t{%1, %0|%0, %1}"
+ [(set_attr "type" "<lt_zcnt_type>")
+ (set_attr "prefix_0f" "1")
+ (set_attr "prefix_rep" "1")
+ (set_attr "mode" "<MODE>")])
+
(define_insn "*<lt_zcnt>_<mode>_falsedep"
[(set (match_operand:SWI48 0 "register_operand" "=r")
(unspec:SWI48
@@ -20450,13 +20512,12 @@
(set_attr "prefix_rep" "1")
(set_attr "mode" "<MODE>")])
-(define_insn "<lt_zcnt>_hi"
+(define_insn "<lt_zcnt>_hi<nf_name>"
[(set (match_operand:HI 0 "register_operand" "=r")
(unspec:HI
- [(match_operand:HI 1 "nonimmediate_operand" "rm")] LT_ZCNT))
- (clobber (reg:CC FLAGS_REG))]
- ""
- "<lt_zcnt>{w}\t{%1, %0|%0, %1}"
+ [(match_operand:HI 1 "nonimmediate_operand" "rm")] LT_ZCNT))]
+ "<nf_condition>"
+ "<nf_prefix><lt_zcnt>{w}\t{%1, %0|%0, %1}"
[(set_attr "type" "<lt_zcnt_type>")
(set_attr "prefix_0f" "1")
(set_attr "prefix_rep" "1")
@@ -20874,6 +20935,30 @@
[(set_attr "type" "bitmanip")
(set_attr "mode" "<MODE>")])
+(define_insn_and_split "popcount<mode>2_nf"
+ [(set (match_operand:SWI48 0 "register_operand" "=r")
+ (popcount:SWI48
+ (match_operand:SWI48 1 "nonimmediate_operand" "rm")))]
+ "TARGET_APX_NF && TARGET_POPCNT"
+{
+#if TARGET_MACHO
+ return "%{nf%} popcnt\t{%1, %0|%0, %1}";
+#else
+ return "%{nf%} popcnt{<imodesuffix>}\t{%1, %0|%0, %1}";
+#endif
+}
+ "&& TARGET_AVOID_FALSE_DEP_FOR_BMI && epilogue_completed
+ && optimize_function_for_speed_p (cfun)
+ && !reg_mentioned_p (operands[0], operands[1])"
+ [(parallel
+ [(set (match_dup 0)
+ (popcount:SWI48 (match_dup 1)))
+ (unspec [(match_dup 0)] UNSPEC_INSN_FALSE_DEP)])]
+ "ix86_expand_clear (operands[0]);"
+ [(set_attr "prefix_rep" "1")
+ (set_attr "type" "bitmanip")
+ (set_attr "mode" "<MODE>")])
+
(define_insn_and_split "popcount<mode>2"
[(set (match_operand:SWI48 0 "register_operand" "=r")
(popcount:SWI48
@@ -20903,6 +20988,24 @@
; False dependency happens when destination is only updated by tzcnt,
; lzcnt or popcnt. There is no false dependency when destination is
; also used in source.
+(define_insn "*popcount<mode>2_falsedep_nf"
+ [(set (match_operand:SWI48 0 "register_operand" "=r")
+ (popcount:SWI48
+ (match_operand:SWI48 1 "nonimmediate_operand" "rm")))
+ (unspec [(match_operand:SWI48 2 "register_operand" "0")]
+ UNSPEC_INSN_FALSE_DEP)]
+ "TARGET_APX_NF && TARGET_POPCNT"
+{
+#if TARGET_MACHO
+ return "%{nf%} popcnt\t{%1, %0|%0, %1}";
+#else
+ return "%{nf%} popcnt{<imodesuffix>}\t{%1, %0|%0, %1}";
+#endif
+}
+ [(set_attr "prefix_rep" "1")
+ (set_attr "type" "bitmanip")
+ (set_attr "mode" "<MODE>")])
+
(define_insn "*popcount<mode>2_falsedep"
[(set (match_operand:SWI48 0 "register_operand" "=r")
(popcount:SWI48
@@ -21060,17 +21163,16 @@
DONE;
})
-(define_insn "popcounthi2"
+(define_insn "popcounthi2<nf_name>"
[(set (match_operand:HI 0 "register_operand" "=r")
(popcount:HI
- (match_operand:HI 1 "nonimmediate_operand" "rm")))
- (clobber (reg:CC FLAGS_REG))]
- "TARGET_POPCNT"
+ (match_operand:HI 1 "nonimmediate_operand" "rm")))]
+ "TARGET_POPCNT && <nf_condition>"
{
#if TARGET_MACHO
- return "popcnt\t{%1, %0|%0, %1}";
+ return "<nf_prefix>popcnt\t{%1, %0|%0, %1}";
#else
- return "popcnt{w}\t{%1, %0|%0, %1}";
+ return "<nf_prefix>popcnt{w}\t{%1, %0|%0, %1}";
#endif
}
[(set_attr "prefix_rep" "1")