aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorliuhongt <hongtao.liu@intel.com>2024-06-19 13:12:00 +0800
committerliuhongt <hongtao.liu@intel.com>2024-07-01 13:20:09 +0800
commit3cb204046c0db899750aee9480af4f1953a40ac3 (patch)
tree04cfdfa0fc3d8e1bfefbb31d943ecbf1f975f925
parent09737d9605521df9232d9990006c44955064f44e (diff)
downloadgcc-3cb204046c0db899750aee9480af4f1953a40ac3.zip
gcc-3cb204046c0db899750aee9480af4f1953a40ac3.tar.gz
gcc-3cb204046c0db899750aee9480af4f1953a40ac3.tar.bz2
Add more splitter for mskmov with avx512 comparison.
gcc/ChangeLog: PR target/115517 * config/i386/sse.md (*<sse>_movmsk<ssemodesuffix><avxsizesuffix>_lt_avx512): New define_insn_and_split. (*<sse>_movmsk<ssemodesuffix><avxsizesuffix>_<u>ext_lt_avx512): Ditto. (*<sse2_avx2>_pmovmskb_lt_avx512): Ditto. (*<sse2_avx2>_pmovmskb_zext_lt_avx512): Ditto. (*sse2_pmovmskb_ext_lt_avx512): Ditto. (*pmovsk_kmask_v16qi_avx512): Ditto. (*pmovsk_mask_v32qi_avx512): Ditto. (*pmovsk_mask_cmp_<mode>_avx512): Ditto. (*pmovsk_ptest_<mode>_avx512): Ditto.
-rw-r--r--gcc/config/i386/sse.md232
1 files changed, 209 insertions, 23 deletions
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 694b4b8..3ffa188 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -10071,24 +10071,6 @@
[(set_attr "prefix" "evex")
(set_attr "mode" "<sseinsnmode>")])
-(define_insn_and_split "*<avx512>_cvtmask2<ssemodesuffix><mode>_not"
- [(set (match_operand:VI12_AVX512VL 0 "register_operand")
- (vec_merge:VI12_AVX512VL
- (match_operand:VI12_AVX512VL 2 "const0_operand")
- (match_operand:VI12_AVX512VL 3 "vector_all_ones_operand")
- (match_operand:<avx512fmaskmode> 1 "register_operand")))]
- "TARGET_AVX512BW && ix86_pre_reload_split ()"
- "#"
- "&& 1"
- [(set (match_dup 4)
- (not:<avx512fmaskmode> (match_dup 1)))
- (set (match_dup 0)
- (vec_merge:VI12_AVX512VL
- (match_dup 3)
- (match_dup 2)
- (match_dup 4)))]
- "operands[4] = gen_reg_rtx (<avx512fmaskmode>mode);")
-
(define_expand "<avx512>_cvtmask2<ssemodesuffix><mode>"
[(set (match_operand:VI48_AVX512VL 0 "register_operand")
(vec_merge:VI48_AVX512VL
@@ -10128,10 +10110,10 @@
(set_attr "mode" "<sseinsnmode>")])
(define_insn_and_split "*<avx512>_cvtmask2<ssemodesuffix><mode>_not"
- [(set (match_operand:VI48_AVX512VL 0 "register_operand")
- (vec_merge:VI48_AVX512VL
- (match_operand:VI48_AVX512VL 2 "const0_operand")
- (match_operand:VI48_AVX512VL 3 "vector_all_ones_operand")
+ [(set (match_operand:VI1248_AVX512VLBW 0 "register_operand")
+ (vec_merge:VI1248_AVX512VLBW
+ (match_operand:VI1248_AVX512VLBW 2 "const0_operand")
+ (match_operand:VI1248_AVX512VLBW 3 "vector_all_ones_operand")
(match_operand:<avx512fmaskmode> 1 "register_operand")))]
"TARGET_AVX512F && ix86_pre_reload_split ()"
"#"
@@ -10139,7 +10121,7 @@
[(set (match_dup 4)
(not:<avx512fmaskmode> (match_dup 1)))
(set (match_dup 0)
- (vec_merge:VI48_AVX512VL
+ (vec_merge:VI1248_AVX512VLBW
(match_dup 3)
(match_dup 2)
(match_dup 4)))]
@@ -21816,6 +21798,30 @@
(set_attr "prefix" "maybe_vex")
(set_attr "mode" "<MODE>")])
+(define_insn_and_split "*<sse>_movmsk<ssemodesuffix><avxsizesuffix>_lt_avx512"
+ [(set (match_operand:SI 0 "register_operand" "=r,jr")
+ (unspec:SI
+ [(subreg:VF_128_256
+ (vec_merge:<sseintvecmode>
+ (match_operand:<sseintvecmode> 3 "vector_all_ones_operand")
+ (match_operand:<sseintvecmode> 4 "const0_operand")
+ (unspec:<avx512fmaskmode>
+ [(match_operand:<sseintvecmode> 1 "register_operand" "x,x")
+ (match_operand:<sseintvecmode> 2 "const0_operand")
+ (const_int 1)]
+ UNSPEC_PCMP)) 0)]
+ UNSPEC_MOVMSK))]
+ "TARGET_SSE"
+ "#"
+ "&& reload_completed"
+ [(set (match_dup 0)
+ (unspec:SI [(match_dup 1)] UNSPEC_MOVMSK))]
+ "operands[1] = gen_lowpart (<MODE>mode, operands[1]);"
+ [(set_attr "isa" "noavx,avx")
+ (set_attr "type" "ssemov")
+ (set_attr "prefix" "maybe_vex")
+ (set_attr "mode" "<MODE>")])
+
(define_insn_and_split "*<sse>_movmsk<ssemodesuffix><avxsizesuffix>_<u>ext_lt"
[(set (match_operand:DI 0 "register_operand" "=r,jr")
(any_extend:DI
@@ -21835,6 +21841,31 @@
(set_attr "prefix" "maybe_vex")
(set_attr "mode" "<MODE>")])
+(define_insn_and_split "*<sse>_movmsk<ssemodesuffix><avxsizesuffix>_<u>ext_lt_avx512"
+ [(set (match_operand:DI 0 "register_operand" "=r,jr")
+ (any_extend:DI
+ (unspec:SI
+ [(subreg:VF_128_256
+ (vec_merge:<sseintvecmode>
+ (match_operand:<sseintvecmode> 3 "vector_all_ones_operand")
+ (match_operand:<sseintvecmode> 4 "const0_operand")
+ (unspec:<avx512fmaskmode>
+ [(match_operand:<sseintvecmode> 1 "register_operand" "x,x")
+ (match_operand:<sseintvecmode> 2 "const0_operand")
+ (const_int 1)]
+ UNSPEC_PCMP)) 0)]
+ UNSPEC_MOVMSK)))]
+ "TARGET_64BIT && TARGET_SSE"
+ "#"
+ "&& reload_completed"
+ [(set (match_dup 0)
+ (any_extend:DI (unspec:SI [(match_dup 1)] UNSPEC_MOVMSK)))]
+ "operands[1] = gen_lowpart (<MODE>mode, operands[1]);"
+ [(set_attr "isa" "noavx,avx")
+ (set_attr "type" "ssemov")
+ (set_attr "prefix" "maybe_vex")
+ (set_attr "mode" "<MODE>")])
+
(define_insn_and_split "*<sse>_movmsk<ssemodesuffix><avxsizesuffix>_shift"
[(set (match_operand:SI 0 "register_operand" "=r,jr")
(unspec:SI
@@ -22024,6 +22055,34 @@
(set_attr "prefix" "maybe_vex")
(set_attr "mode" "SI")])
+(define_insn_and_split "*<sse2_avx2>_pmovmskb_lt_avx512"
+ [(set (match_operand:SI 0 "register_operand" "=r,jr")
+ (unspec:SI
+ [(vec_merge:VI1_AVX2
+ (match_operand:VI1_AVX2 3 "vector_all_ones_operand")
+ (match_operand:VI1_AVX2 4 "const0_operand")
+ (unspec:<avx512fmaskmode>
+ [(match_operand:VI1_AVX2 1 "register_operand" "x,x")
+ (match_operand:VI1_AVX2 2 "const0_operand")
+ (const_int 1)]
+ UNSPEC_PCMP))]
+ UNSPEC_MOVMSK))]
+ "TARGET_SSE2"
+ "#"
+ "&& 1"
+ [(set (match_dup 0)
+ (unspec:SI [(match_dup 1)] UNSPEC_MOVMSK))]
+ ""
+ [(set_attr "isa" "noavx,avx")
+ (set_attr "type" "ssemov")
+ (set (attr "prefix_data16")
+ (if_then_else
+ (match_test "TARGET_AVX")
+ (const_string "*")
+ (const_string "1")))
+ (set_attr "prefix" "maybe_vex")
+ (set_attr "mode" "SI")])
+
(define_insn_and_split "*<sse2_avx2>_pmovmskb_zext_lt"
[(set (match_operand:DI 0 "register_operand" "=r,jr")
(zero_extend:DI
@@ -22047,6 +22106,35 @@
(set_attr "prefix" "maybe_vex")
(set_attr "mode" "SI")])
+(define_insn_and_split "*<sse2_avx2>_pmovmskb_zext_lt_avx512"
+ [(set (match_operand:DI 0 "register_operand" "=r,jr")
+ (zero_extend:DI
+ (unspec:SI
+ [(vec_merge:VI1_AVX2
+ (match_operand:VI1_AVX2 3 "vector_all_ones_operand")
+ (match_operand:VI1_AVX2 4 "const0_operand")
+ (unspec:<avx512fmaskmode>
+ [(match_operand:VI1_AVX2 1 "register_operand" "x,x")
+ (match_operand:VI1_AVX2 2 "const0_operand")
+ (const_int 1)]
+ UNSPEC_PCMP))]
+ UNSPEC_MOVMSK)))]
+ "TARGET_64BIT && TARGET_SSE2"
+ "#"
+ "&& 1"
+ [(set (match_dup 0)
+ (zero_extend:DI (unspec:SI [(match_dup 1)] UNSPEC_MOVMSK)))]
+ ""
+ [(set_attr "isa" "noavx,avx")
+ (set_attr "type" "ssemov")
+ (set (attr "prefix_data16")
+ (if_then_else
+ (match_test "TARGET_AVX")
+ (const_string "*")
+ (const_string "1")))
+ (set_attr "prefix" "maybe_vex")
+ (set_attr "mode" "SI")])
+
(define_insn_and_split "*sse2_pmovmskb_ext_lt"
[(set (match_operand:DI 0 "register_operand" "=r,jr")
(sign_extend:DI
@@ -22070,6 +22158,63 @@
(set_attr "prefix" "maybe_vex")
(set_attr "mode" "SI")])
+(define_insn_and_split "*sse2_pmovmskb_ext_lt_avx512"
+ [(set (match_operand:DI 0 "register_operand" "=r,jr")
+ (sign_extend:DI
+ (unspec:SI
+ [(vec_merge:VI1_AVX2
+ (match_operand:VI1_AVX2 3 "vector_all_ones_operand")
+ (match_operand:VI1_AVX2 4 "const0_operand")
+ (unspec:<avx512fmaskmode>
+ [(match_operand:VI1_AVX2 1 "register_operand" "x,x")
+ (match_operand:VI1_AVX2 2 "const0_operand")
+ (const_int 1)]
+ UNSPEC_PCMP))]
+ UNSPEC_MOVMSK)))]
+ "TARGET_64BIT && TARGET_SSE2"
+ "#"
+ "&& 1"
+ [(set (match_dup 0)
+ (sign_extend:DI (unspec:SI [(match_dup 1)] UNSPEC_MOVMSK)))]
+ ""
+ [(set_attr "isa" "noavx,avx")
+ (set_attr "type" "ssemov")
+ (set (attr "prefix_data16")
+ (if_then_else
+ (match_test "TARGET_AVX")
+ (const_string "*")
+ (const_string "1")))
+ (set_attr "prefix" "maybe_vex")
+ (set_attr "mode" "SI")])
+
+(define_insn_and_split "*pmovsk_kmask_v16qi_avx512"
+ [(set (match_operand:SI 0 "register_operand")
+ (unspec:SI
+ [(vec_merge:V16QI
+ (match_operand:V16QI 2 "vector_all_ones_operand")
+ (match_operand:V16QI 3 "const0_operand")
+ (match_operand:HI 1 "register_operand"))]
+ UNSPEC_MOVMSK))]
+ "TARGET_SSE2 && ix86_pre_reload_split ()"
+ "#"
+ "&& 1"
+ [(set (match_dup 0)
+ (zero_extend:SI (match_dup 1)))])
+
+(define_insn_and_split "*pmovsk_mask_v32qi_avx512"
+ [(set (match_operand:SI 0 "register_operand")
+ (unspec:SI
+ [(vec_merge:V32QI
+ (match_operand:V32QI 2 "vector_all_ones_operand")
+ (match_operand:V32QI 3 "const0_operand")
+ (match_operand:SI 1 "register_operand"))]
+ UNSPEC_MOVMSK))]
+ "TARGET_SSE2 && ix86_pre_reload_split ()"
+ "#"
+ "&& 1"
+ [(set (match_dup 0)
+ (match_dup 1))])
+
;; Optimize pxor/pcmpeqb/pmovmskb/cmp 0xffff to ptest.
(define_mode_attr vi1avx2const
[(V32QI "0xffffffff") (V16QI "0xffff")])
@@ -22088,6 +22233,47 @@
(match_dup 0)]
UNSPEC_PTEST))])
+(define_insn_and_split "*pmovsk_mask_cmp_<mode>_avx512"
+ [(set (reg:CCZ FLAGS_REG)
+ (compare:CCZ
+ (unspec:SI
+ [(vec_merge:VI1_AVX2
+ (match_operand:VI1_AVX2 0 "vector_all_ones_operand")
+ (match_operand:VI1_AVX2 3 "const0_operand")
+ (match_operand:<avx512fmaskmode> 1 "register_operand"))]
+ UNSPEC_MOVMSK)
+ (match_operand 2 "const_int_operand")))]
+ "TARGET_AVX512VL && UINTVAL (operands[2]) <= <vi1avx2const>"
+ "#"
+ "&& 1"
+ [(set (reg:CCZ FLAGS_REG)
+ (compare:CCZ
+ (match_dup 1)
+ (match_dup 2)))]
+ "operands[2] = gen_int_mode (UINTVAL (operands[2]), <avx512fmaskmode>mode);")
+
+(define_insn_and_split "*pmovsk_ptest_<mode>_avx512"
+ [(set (reg:CCZ FLAGS_REG)
+ (compare:CCZ
+ (unspec:SI
+ [(vec_merge:VI1_AVX2
+ (match_operand:VI1_AVX2 3 "vector_all_ones_operand")
+ (match_operand:VI1_AVX2 4 "const0_operand")
+ (unspec:<avx512fmaskmode>
+ [(match_operand:VI1_AVX2 0 "vector_operand")
+ (match_operand:VI1_AVX2 1 "const0_operand")
+ (const_int 0)]
+ UNSPEC_PCMP))]
+ UNSPEC_MOVMSK)
+ (match_operand 2 "const_int_operand")))]
+ "TARGET_AVX512VL && (INTVAL (operands[2]) == (int) (<vi1avx2const>))"
+ "#"
+ "&& 1"
+ [(set (reg:CCZ FLAGS_REG)
+ (unspec:CCZ [(match_dup 0)
+ (match_dup 0)]
+ UNSPEC_PTEST))])
+
(define_expand "sse2_maskmovdqu"
[(set (match_operand:V16QI 0 "memory_operand")
(unspec:V16QI [(match_operand:V16QI 1 "register_operand")