From 15269a6b13da7f72bf78bbdd0bf7cd579b4b620c Mon Sep 17 00:00:00 2001 From: liuhongt Date: Tue, 29 Aug 2023 12:51:19 +0800 Subject: Refactor vector HF/BF mode iterators and patterns. gcc/ChangeLog: * config/i386/sse.md (_blendm): Merge VF_AVX512HFBFVL into VI12HFBF_AVX512VL. (VF_AVX512HFBF16): Renamed to VHFBF. (VF_AVX512FP16VL): Renamed to VHF_AVX512VL. (VF_AVX512FP16): Removed. (div3): Adjust VF_AVX512FP16VL to VHF_AVX512VL. (avx512fp16_rcp2): Ditto. (rsqrt2): Ditto. (_rsqrt2): Ditto. (vcond): Ditto. (vcond): Ditto. (_fmaddc__mask1): Ditto. (_fmaddc__maskz): Ditto. (_fcmaddc__mask1): Ditto. (_fcmaddc__maskz): Ditto. (cmla4): Ditto. (fma__fadd_fmul): Ditto. (fma__fadd_fcmul): Ditto. (fma___fma_zero): Ditto. (fma__fmaddc_bcst): Ditto. (fma__fcmaddc_bcst): Ditto. (___mask): Ditto. (cmul3): Ditto. (__): Ditto. (vec_unpacks_lo_): Ditto. (vec_unpacks_hi_): Ditto. (vec_unpack_fix_trunc_lo_): Ditto. (vec_unpack_fix_trunc_lo_): Ditto. (*vec_extract_0): Ditto. (*_cmp3): Extend to V48H_AVX512VL. --- gcc/config/i386/sse.md | 238 ++++++++++++++++++++++--------------------------- 1 file changed, 108 insertions(+), 130 deletions(-) (limited to 'gcc') diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index 192e746..e282d97 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -459,18 +459,10 @@ (define_mode_iterator VF1_AVX512VL [V16SF (V8SF "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")]) -(define_mode_iterator VF_AVX512FP16 - [V32HF V16HF V8HF]) +(define_mode_iterator VHFBF + [V32HF V16HF V8HF V32BF V16BF V8BF]) -(define_mode_iterator VF_AVX512HFBF16 - [(V32HF "TARGET_AVX512FP16") (V16HF "TARGET_AVX512FP16") - (V8HF "TARGET_AVX512FP16") V32BF V16BF V8BF]) - -(define_mode_iterator VF_AVX512HFBFVL - [V32HF (V16HF "TARGET_AVX512VL") (V8HF "TARGET_AVX512VL") - V32BF (V16BF "TARGET_AVX512VL") (V8BF "TARGET_AVX512VL")]) - -(define_mode_iterator VF_AVX512FP16VL +(define_mode_iterator VHF_AVX512VL [V32HF (V16HF "TARGET_AVX512VL") (V8HF "TARGET_AVX512VL")]) ;; All vector integer modes @@ -1624,29 +1616,15 @@ (set_attr "mode" "")]) (define_insn "_blendm" - [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v,v") - (vec_merge:VI12_AVX512VL - (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm,vm") - (match_operand:VI12_AVX512VL 1 "nonimm_or_0_operand" "0C,v") - (match_operand: 3 "register_operand" "Yk,Yk")))] - "TARGET_AVX512BW" - "@ - vmovdqu\t{%2, %0%{%3%}%N1|%0%{%3%}%N1, %2} - vpblendm\t{%2, %1, %0%{%3%}|%0%{%3%}, %1, %2}" - [(set_attr "type" "ssemov") - (set_attr "prefix" "evex") - (set_attr "mode" "")]) - -(define_insn "_blendm" - [(set (match_operand:VF_AVX512HFBFVL 0 "register_operand" "=v,v") - (vec_merge:VF_AVX512HFBFVL - (match_operand:VF_AVX512HFBFVL 2 "nonimmediate_operand" "vm,vm") - (match_operand:VF_AVX512HFBFVL 1 "nonimm_or_0_operand" "0C,v") + [(set (match_operand:VI12HFBF_AVX512VL 0 "register_operand" "=v,v") + (vec_merge:VI12HFBF_AVX512VL + (match_operand:VI12HFBF_AVX512VL 2 "nonimmediate_operand" "vm,vm") + (match_operand:VI12HFBF_AVX512VL 1 "nonimm_or_0_operand" "0C,v") (match_operand: 3 "register_operand" "Yk,Yk")))] "TARGET_AVX512BW" "@ vmovdqu\t{%2, %0%{%3%}%N1|%0%{%3%}%N1, %2} - vpblendmw\t{%2, %1, %0%{%3%}|%0%{%3%}, %1, %2}" + vpblendm\t{%2, %1, %0%{%3%}|%0%{%3%}, %1, %2}" [(set_attr "type" "ssemov") (set_attr "prefix" "evex") (set_attr "mode" "")]) @@ -2448,10 +2426,10 @@ "TARGET_SSE2") (define_expand "div3" - [(set (match_operand:VF_AVX512FP16VL 0 "register_operand") - (div:VF_AVX512FP16VL - (match_operand:VF_AVX512FP16VL 1 "register_operand") - (match_operand:VF_AVX512FP16VL 2 "vector_operand")))] + [(set (match_operand:VHF_AVX512VL 0 "register_operand") + (div:VHF_AVX512VL + (match_operand:VHF_AVX512VL 1 "register_operand") + (match_operand:VHF_AVX512VL 2 "vector_operand")))] "TARGET_AVX512FP16" { /* Transform HF vector div to vector mul/rcp. */ @@ -2568,9 +2546,9 @@ (set_attr "mode" "SF")]) (define_insn "avx512fp16_rcp2" - [(set (match_operand:VF_AVX512FP16VL 0 "register_operand" "=v") - (unspec:VF_AVX512FP16VL - [(match_operand:VF_AVX512FP16VL 1 "nonimmediate_operand" "vm")] + [(set (match_operand:VHF_AVX512VL 0 "register_operand" "=v") + (unspec:VHF_AVX512VL + [(match_operand:VHF_AVX512VL 1 "nonimmediate_operand" "vm")] UNSPEC_RCP))] "TARGET_AVX512FP16" "vrcpph\t{%1, %0|%0, %1}" @@ -2731,9 +2709,9 @@ }) (define_expand "rsqrt2" - [(set (match_operand:VF_AVX512FP16VL 0 "register_operand") - (unspec:VF_AVX512FP16VL - [(match_operand:VF_AVX512FP16VL 1 "vector_operand")] + [(set (match_operand:VHF_AVX512VL 0 "register_operand") + (unspec:VHF_AVX512VL + [(match_operand:VHF_AVX512VL 1 "vector_operand")] UNSPEC_RSQRT))] "TARGET_AVX512FP16") @@ -2748,9 +2726,9 @@ (set_attr "mode" "")]) (define_insn "_rsqrt2" - [(set (match_operand:VF_AVX512FP16VL 0 "register_operand" "=v") - (unspec:VF_AVX512FP16VL - [(match_operand:VF_AVX512FP16VL 1 "vector_operand" "vBm")] UNSPEC_RSQRT))] + [(set (match_operand:VHF_AVX512VL 0 "register_operand" "=v") + (unspec:VHF_AVX512VL + [(match_operand:VHF_AVX512VL 1 "vector_operand" "vBm")] UNSPEC_RSQRT))] "TARGET_AVX512FP16" "vrsqrtph\t{%1, %0|%0, %1}" [(set_attr "type" "sse") @@ -3909,8 +3887,8 @@ [(set (match_operand: 0 "register_operand") (not: (unspec: - [(match_operand:V48_AVX512VL 1 "register_operand") - (match_operand:V48_AVX512VL 2 "nonimmediate_operand") + [(match_operand:V48H_AVX512VL 1 "register_operand") + (match_operand:V48H_AVX512VL 2 "nonimmediate_operand") (match_operand:SI 3 "" "n")] UNSPEC_PCMP)))] "TARGET_AVX512F && ix86_pre_reload_split ()" @@ -4594,13 +4572,13 @@ }) (define_expand "vcond" - [(set (match_operand:VF_AVX512FP16VL 0 "register_operand") - (if_then_else:VF_AVX512FP16VL + [(set (match_operand:VHF_AVX512VL 0 "register_operand") + (if_then_else:VHF_AVX512VL (match_operator 3 "" - [(match_operand:VF_AVX512FP16VL 4 "vector_operand") - (match_operand:VF_AVX512FP16VL 5 "vector_operand")]) - (match_operand:VF_AVX512FP16VL 1 "general_operand") - (match_operand:VF_AVX512FP16VL 2 "general_operand")))] + [(match_operand:VHF_AVX512VL 4 "vector_operand") + (match_operand:VHF_AVX512VL 5 "vector_operand")]) + (match_operand:VHF_AVX512VL 1 "general_operand") + (match_operand:VHF_AVX512VL 2 "general_operand")))] "TARGET_AVX512FP16" { bool ok = ix86_expand_fp_vcond (operands); @@ -4612,8 +4590,8 @@ [(set (match_operand: 0 "register_operand") (if_then_else: (match_operator 3 "" - [(match_operand:VF_AVX512FP16VL 4 "vector_operand") - (match_operand:VF_AVX512FP16VL 5 "vector_operand")]) + [(match_operand:VHF_AVX512VL 4 "vector_operand") + (match_operand:VHF_AVX512VL 5 "vector_operand")]) (match_operand: 1 "general_operand") (match_operand: 2 "general_operand")))] "TARGET_AVX512FP16" @@ -6513,10 +6491,10 @@ (V8HF "avx512vl_loadv4sf")]) (define_expand "_fmaddc__mask1" - [(match_operand:VF_AVX512FP16VL 0 "register_operand") - (match_operand:VF_AVX512FP16VL 1 "") - (match_operand:VF_AVX512FP16VL 2 "") - (match_operand:VF_AVX512FP16VL 3 "") + [(match_operand:VHF_AVX512VL 0 "register_operand") + (match_operand:VHF_AVX512VL 1 "") + (match_operand:VHF_AVX512VL 2 "") + (match_operand:VHF_AVX512VL 3 "") (match_operand: 4 "register_operand")] "TARGET_AVX512FP16 && " { @@ -6543,10 +6521,10 @@ }) (define_expand "_fmaddc__maskz" - [(match_operand:VF_AVX512FP16VL 0 "register_operand") - (match_operand:VF_AVX512FP16VL 1 "") - (match_operand:VF_AVX512FP16VL 2 "") - (match_operand:VF_AVX512FP16VL 3 "") + [(match_operand:VHF_AVX512VL 0 "register_operand") + (match_operand:VHF_AVX512VL 1 "") + (match_operand:VHF_AVX512VL 2 "") + (match_operand:VHF_AVX512VL 3 "") (match_operand: 4 "register_operand")] "TARGET_AVX512FP16 && " { @@ -6557,10 +6535,10 @@ }) (define_expand "_fcmaddc__mask1" - [(match_operand:VF_AVX512FP16VL 0 "register_operand") - (match_operand:VF_AVX512FP16VL 1 "") - (match_operand:VF_AVX512FP16VL 2 "") - (match_operand:VF_AVX512FP16VL 3 "") + [(match_operand:VHF_AVX512VL 0 "register_operand") + (match_operand:VHF_AVX512VL 1 "") + (match_operand:VHF_AVX512VL 2 "") + (match_operand:VHF_AVX512VL 3 "") (match_operand: 4 "register_operand")] "TARGET_AVX512FP16 && " { @@ -6589,10 +6567,10 @@ }) (define_expand "_fcmaddc__maskz" - [(match_operand:VF_AVX512FP16VL 0 "register_operand") - (match_operand:VF_AVX512FP16VL 1 "") - (match_operand:VF_AVX512FP16VL 2 "") - (match_operand:VF_AVX512FP16VL 3 "") + [(match_operand:VHF_AVX512VL 0 "register_operand") + (match_operand:VHF_AVX512VL 1 "") + (match_operand:VHF_AVX512VL 2 "") + (match_operand:VHF_AVX512VL 3 "") (match_operand: 4 "register_operand")] "TARGET_AVX512FP16 && " { @@ -6603,20 +6581,20 @@ }) (define_expand "cmla4" - [(set (match_operand:VF_AVX512FP16VL 0 "register_operand") - (unspec:VF_AVX512FP16VL - [(match_operand:VF_AVX512FP16VL 1 "vector_operand") - (match_operand:VF_AVX512FP16VL 2 "vector_operand") - (match_operand:VF_AVX512FP16VL 3 "vector_operand")] + [(set (match_operand:VHF_AVX512VL 0 "register_operand") + (unspec:VHF_AVX512VL + [(match_operand:VHF_AVX512VL 1 "vector_operand") + (match_operand:VHF_AVX512VL 2 "vector_operand") + (match_operand:VHF_AVX512VL 3 "vector_operand")] UNSPEC_COMPLEX_F_C_MA))] "TARGET_AVX512FP16") (define_insn "fma__" - [(set (match_operand:VF_AVX512FP16VL 0 "register_operand" "=&v") - (unspec:VF_AVX512FP16VL - [(match_operand:VF_AVX512FP16VL 1 "" "%v") - (match_operand:VF_AVX512FP16VL 2 "" "") - (match_operand:VF_AVX512FP16VL 3 "" "0")] + [(set (match_operand:VHF_AVX512VL 0 "register_operand" "=&v") + (unspec:VHF_AVX512VL + [(match_operand:VHF_AVX512VL 1 "" "%v") + (match_operand:VHF_AVX512VL 2 "" "") + (match_operand:VHF_AVX512VL 3 "" "0")] UNSPEC_COMPLEX_F_C_MA))] "TARGET_AVX512FP16 && && " "v\t{%2, %1, %0|%0, %1, %2}" @@ -6625,54 +6603,54 @@ (set_attr "mode" "")]) (define_insn_and_split "fma__fadd_fmul" - [(set (match_operand:VF_AVX512FP16VL 0 "register_operand") - (plus:VF_AVX512FP16VL - (unspec:VF_AVX512FP16VL - [(match_operand:VF_AVX512FP16VL 1 "vector_operand") - (match_operand:VF_AVX512FP16VL 2 "vector_operand")] + [(set (match_operand:VHF_AVX512VL 0 "register_operand") + (plus:VHF_AVX512VL + (unspec:VHF_AVX512VL + [(match_operand:VHF_AVX512VL 1 "vector_operand") + (match_operand:VHF_AVX512VL 2 "vector_operand")] UNSPEC_COMPLEX_FMUL) - (match_operand:VF_AVX512FP16VL 3 "vector_operand")))] + (match_operand:VHF_AVX512VL 3 "vector_operand")))] "TARGET_AVX512FP16 && flag_unsafe_math_optimizations && ix86_pre_reload_split ()" "#" "&& 1" [(set (match_dup 0) - (unspec:VF_AVX512FP16VL + (unspec:VHF_AVX512VL [(match_dup 1) (match_dup 2) (match_dup 3)] UNSPEC_COMPLEX_FMA))]) (define_insn_and_split "fma__fadd_fcmul" - [(set (match_operand:VF_AVX512FP16VL 0 "register_operand") - (plus:VF_AVX512FP16VL - (unspec:VF_AVX512FP16VL - [(match_operand:VF_AVX512FP16VL 1 "vector_operand") - (match_operand:VF_AVX512FP16VL 2 "vector_operand")] + [(set (match_operand:VHF_AVX512VL 0 "register_operand") + (plus:VHF_AVX512VL + (unspec:VHF_AVX512VL + [(match_operand:VHF_AVX512VL 1 "vector_operand") + (match_operand:VHF_AVX512VL 2 "vector_operand")] UNSPEC_COMPLEX_FCMUL) - (match_operand:VF_AVX512FP16VL 3 "vector_operand")))] + (match_operand:VHF_AVX512VL 3 "vector_operand")))] "TARGET_AVX512FP16 && flag_unsafe_math_optimizations && ix86_pre_reload_split ()" "#" "&& 1" [(set (match_dup 0) - (unspec:VF_AVX512FP16VL + (unspec:VHF_AVX512VL [(match_dup 1) (match_dup 2) (match_dup 3)] UNSPEC_COMPLEX_FCMA))]) (define_insn_and_split "fma___fma_zero" - [(set (match_operand:VF_AVX512FP16VL 0 "register_operand") - (plus:VF_AVX512FP16VL - (unspec:VF_AVX512FP16VL - [(match_operand:VF_AVX512FP16VL 1 "vector_operand") - (match_operand:VF_AVX512FP16VL 2 "vector_operand") - (match_operand:VF_AVX512FP16VL 3 "const0_operand")] + [(set (match_operand:VHF_AVX512VL 0 "register_operand") + (plus:VHF_AVX512VL + (unspec:VHF_AVX512VL + [(match_operand:VHF_AVX512VL 1 "vector_operand") + (match_operand:VHF_AVX512VL 2 "vector_operand") + (match_operand:VHF_AVX512VL 3 "const0_operand")] UNSPEC_COMPLEX_F_C_MA) - (match_operand:VF_AVX512FP16VL 4 "vector_operand")))] + (match_operand:VHF_AVX512VL 4 "vector_operand")))] "TARGET_AVX512FP16 && flag_unsafe_math_optimizations && ix86_pre_reload_split ()" "#" "&& 1" [(set (match_dup 0) - (unspec:VF_AVX512FP16VL + (unspec:VHF_AVX512VL [(match_dup 1) (match_dup 2) (match_dup 4)] UNSPEC_COMPLEX_F_C_MA))]) @@ -6690,12 +6668,12 @@ (set_attr "mode" "")]) (define_insn_and_split "fma__fmaddc_bcst" - [(set (match_operand:VF_AVX512FP16VL 0 "register_operand") - (unspec:VF_AVX512FP16VL - [(match_operand:VF_AVX512FP16VL 1 "vector_operand") - (subreg:VF_AVX512FP16VL + [(set (match_operand:VHF_AVX512VL 0 "register_operand") + (unspec:VHF_AVX512VL + [(match_operand:VHF_AVX512VL 1 "vector_operand") + (subreg:VHF_AVX512VL (match_operand: 2 "bcst_vector_operand") 0) - (match_operand:VF_AVX512FP16VL 3 "vector_operand")] + (match_operand:VHF_AVX512VL 3 "vector_operand")] UNSPEC_COMPLEX_FMA))] "TARGET_AVX512FP16 && ix86_pre_reload_split ()" "#" @@ -6717,12 +6695,12 @@ }) (define_insn_and_split "fma__fcmaddc_bcst" - [(set (match_operand:VF_AVX512FP16VL 0 "register_operand") - (unspec:VF_AVX512FP16VL - [(match_operand:VF_AVX512FP16VL 1 "vector_operand") - (subreg:VF_AVX512FP16VL + [(set (match_operand:VHF_AVX512VL 0 "register_operand") + (unspec:VHF_AVX512VL + [(match_operand:VHF_AVX512VL 1 "vector_operand") + (subreg:VHF_AVX512VL (match_operand: 2 "bcst_vector_operand") 0) - (match_operand:VF_AVX512FP16VL 3 "vector_operand")] + (match_operand:VHF_AVX512VL 3 "vector_operand")] UNSPEC_COMPLEX_FCMA))] "TARGET_AVX512FP16 && ix86_pre_reload_split ()" "#" @@ -6745,12 +6723,12 @@ }) (define_insn "___mask" - [(set (match_operand:VF_AVX512FP16VL 0 "register_operand" "=&v") - (vec_merge:VF_AVX512FP16VL - (unspec:VF_AVX512FP16VL - [(match_operand:VF_AVX512FP16VL 1 "nonimmediate_operand" "%v") - (match_operand:VF_AVX512FP16VL 2 "nonimmediate_operand" "") - (match_operand:VF_AVX512FP16VL 3 "register_operand" "0")] + [(set (match_operand:VHF_AVX512VL 0 "register_operand" "=&v") + (vec_merge:VHF_AVX512VL + (unspec:VHF_AVX512VL + [(match_operand:VHF_AVX512VL 1 "nonimmediate_operand" "%v") + (match_operand:VHF_AVX512VL 2 "nonimmediate_operand" "") + (match_operand:VHF_AVX512VL 3 "register_operand" "0")] UNSPEC_COMPLEX_F_C_MA) (match_dup 1) (unspec: @@ -6763,18 +6741,18 @@ (set_attr "mode" "")]) (define_expand "cmul3" - [(set (match_operand:VF_AVX512FP16VL 0 "register_operand") - (unspec:VF_AVX512FP16VL - [(match_operand:VF_AVX512FP16VL 1 "vector_operand") - (match_operand:VF_AVX512FP16VL 2 "vector_operand")] + [(set (match_operand:VHF_AVX512VL 0 "register_operand") + (unspec:VHF_AVX512VL + [(match_operand:VHF_AVX512VL 1 "vector_operand") + (match_operand:VHF_AVX512VL 2 "vector_operand")] UNSPEC_COMPLEX_F_C_MUL))] "TARGET_AVX512FP16") (define_insn "__" - [(set (match_operand:VF_AVX512FP16VL 0 "register_operand" "=&v") - (unspec:VF_AVX512FP16VL - [(match_operand:VF_AVX512FP16VL 1 "nonimmediate_operand" "%v") - (match_operand:VF_AVX512FP16VL 2 "nonimmediate_operand" "")] + [(set (match_operand:VHF_AVX512VL 0 "register_operand" "=&v") + (unspec:VHF_AVX512VL + [(match_operand:VHF_AVX512VL 1 "nonimmediate_operand" "%v") + (match_operand:VHF_AVX512VL 2 "nonimmediate_operand" "")] UNSPEC_COMPLEX_F_C_MUL))] "TARGET_AVX512FP16 && " { @@ -7016,7 +6994,7 @@ (define_expand "vec_unpacks_lo_" [(match_operand: 0 "register_operand") - (match_operand:VF_AVX512FP16VL 1 "register_operand")] + (match_operand:VHF_AVX512VL 1 "register_operand")] "TARGET_AVX512FP16" { rtx tem = operands[1]; @@ -7037,7 +7015,7 @@ (define_expand "vec_unpacks_hi_" [(match_operand: 0 "register_operand") - (match_operand:VF_AVX512FP16VL 1 "register_operand")] + (match_operand:VHF_AVX512VL 1 "register_operand")] "TARGET_AVX512FP16" { rtx tem = operands[1]; @@ -8988,7 +8966,7 @@ (define_expand "vec_unpack_fix_trunc_lo_" [(match_operand: 0 "register_operand") (any_fix: - (match_operand:VF_AVX512FP16VL 1 "register_operand"))] + (match_operand:VHF_AVX512VL 1 "register_operand"))] "TARGET_AVX512FP16" { rtx tem = operands[1]; @@ -9010,7 +8988,7 @@ (define_expand "vec_unpack_fix_trunc_hi_" [(match_operand: 0 "register_operand") (any_fix: - (match_operand:VF_AVX512FP16VL 1 "register_operand"))] + (match_operand:VHF_AVX512VL 1 "register_operand"))] "TARGET_AVX512FP16" { rtx tem = operands[1]; @@ -12262,7 +12240,7 @@ (define_insn_and_split "*vec_extract_0" [(set (match_operand: 0 "nonimmediate_operand" "=v,m,r") (vec_select: - (match_operand:VF_AVX512HFBF16 1 "nonimmediate_operand" "vm,v,m") + (match_operand:VHFBF 1 "nonimmediate_operand" "vm,v,m") (parallel [(const_int 0)])))] "TARGET_AVX512F && !(MEM_P (operands[0]) && MEM_P (operands[1]))" "#" -- cgit v1.1